diff options
Diffstat (limited to 'src/cmd/godoc')
-rw-r--r-- | src/cmd/godoc/doc.go | 5 | ||||
-rw-r--r-- | src/cmd/godoc/format.go | 68 | ||||
-rw-r--r-- | src/cmd/godoc/godoc.go | 120 | ||||
-rw-r--r-- | src/cmd/godoc/index.go | 151 | ||||
-rw-r--r-- | src/cmd/godoc/main.go | 14 | ||||
-rwxr-xr-x | src/cmd/godoc/snippet.go | 2 | ||||
-rw-r--r-- | src/cmd/godoc/spec.go | 3 | ||||
-rw-r--r-- | src/cmd/godoc/utils.go | 62 |
8 files changed, 279 insertions, 146 deletions
diff --git a/src/cmd/godoc/doc.go b/src/cmd/godoc/doc.go index 02779384c..f0006e750 100644 --- a/src/cmd/godoc/doc.go +++ b/src/cmd/godoc/doc.go @@ -47,8 +47,9 @@ The flags are: width of tabs in units of spaces -timestamps=true show timestamps with directory listings - -fulltext=false - build full text index for regular expression queries + -maxresults=10000 + maximum number of full text search results shown + (no full text index is built if maxresults <= 0) -path="" additional package directories (colon-separated) -html diff --git a/src/cmd/godoc/format.go b/src/cmd/godoc/format.go index f68c67b24..66b01aa64 100644 --- a/src/cmd/godoc/format.go +++ b/src/cmd/godoc/format.go @@ -62,12 +62,48 @@ func FormatSelections(w io.Writer, text []byte, lw LinkWriter, links Selection, if lw != nil { selections = append(selections, links) } + // compute the sequence of consecutive segment changes changes := newMerger(selections) + // The i'th bit in bitset indicates that the text // at the current offset is covered by selections[i]. bitset := 0 lastOffs := 0 + + // Text segments are written in a delayed fashion + // such that consecutive segments belonging to the + // same selection can be combined (peephole optimization). + // last describes the last segment which has not yet been written. + var last struct { + begin, end int // valid if begin < end + bitset int + } + + // flush writes the last delayed text segment + flush := func() { + if last.begin < last.end { + sw(w, text[last.begin:last.end], last.bitset) + } + last.begin = last.end // invalidate last + } + + // segment runs the segment [lastOffs, end) with the selection + // indicated by bitset through the segment peephole optimizer. + segment := func(end int) { + if lastOffs < end { // ignore empty segments + if last.end != lastOffs || last.bitset != bitset { + // the last segment is not adjacent to or + // differs from the new one + flush() + // start a new segment + last.begin = lastOffs + } + last.end = end + last.bitset = bitset + } + } + for { // get the next segment change index, offs, start := changes.next() @@ -81,14 +117,15 @@ func FormatSelections(w io.Writer, text []byte, lw LinkWriter, links Selection, // we have a link segment change: // format the previous selection segment, write the // link tag and start a new selection segment - sw(w, text[lastOffs:offs], bitset) + segment(offs) + flush() lastOffs = offs lw(w, offs, start) } else { // we have a selection change: // format the previous selection segment, determine // the new selection bitset and start a new segment - sw(w, text[lastOffs:offs], bitset) + segment(offs) lastOffs = offs mask := 1 << uint(index) if start { @@ -98,7 +135,8 @@ func FormatSelections(w io.Writer, text []byte, lw LinkWriter, links Selection, } } } - sw(w, text[lastOffs:], bitset) + segment(len(text)) + flush() } @@ -201,7 +239,9 @@ func lineSelection(text []byte) Selection { // func commentSelection(src []byte) Selection { var s scanner.Scanner - file := s.Init(token.NewFileSet(), "", src, nil, scanner.ScanComments+scanner.InsertSemis) + fset := token.NewFileSet() + file := fset.AddFile("", fset.Base(), len(src)) + s.Init(file, src, nil, scanner.ScanComments+scanner.InsertSemis) return func() (seg []int) { for { pos, tok, lit := s.Scan() @@ -283,17 +323,15 @@ var endTag = []byte(`</span>`) func selectionTag(w io.Writer, text []byte, selections int) { - if len(text) > 0 { - if selections < len(startTags) { - if tag := startTags[selections]; len(tag) > 0 { - w.Write(tag) - template.HTMLEscape(w, text) - w.Write(endTag) - return - } + if selections < len(startTags) { + if tag := startTags[selections]; len(tag) > 0 { + w.Write(tag) + template.HTMLEscape(w, text) + w.Write(endTag) + return } - template.HTMLEscape(w, text) } + template.HTMLEscape(w, text) } @@ -322,12 +360,12 @@ func FormatText(text []byte, line int, goSource bool, pattern string, selection if pattern != "" { highlights = regexpSelection(text, pattern) } - if comments != nil || highlights != nil || selection != nil { + if line >= 0 || comments != nil || highlights != nil || selection != nil { var lineTag LinkWriter if line >= 0 { lineTag = func(w io.Writer, _ int, start bool) { if start { - fmt.Fprintf(w, "<a id=\"L%d\"></a>%5d\t", line, line) + fmt.Fprintf(w, "<a id=\"L%d\"></a><span class=\"ln\">%6d</span>\t", line, line) line++ } } diff --git a/src/cmd/godoc/godoc.go b/src/cmd/godoc/godoc.go index d6054ab9d..6a00a3e70 100644 --- a/src/cmd/godoc/godoc.go +++ b/src/cmd/godoc/godoc.go @@ -25,7 +25,6 @@ import ( "strings" "template" "time" - "utf8" ) @@ -56,7 +55,7 @@ var ( // TODO(gri) consider the invariant that goroot always end in '/' goroot = flag.String("goroot", runtime.GOROOT(), "Go root directory") testDir = flag.String("testdir", "", "Go root subdirectory - for testing only (faster startups)") - path = flag.String("path", "", "additional package directories (colon-separated)") + pkgPath = flag.String("path", "", "additional package directories (colon-separated)") filter = flag.String("filter", "", "filter file containing permitted package directory paths") filterMin = flag.Int("filter_minutes", 0, "filter file update interval in minutes; disabled if <= 0") filterDelay delayTime // actual filter update interval in minutes; usually filterDelay == filterMin, but filterDelay may back off exponentially @@ -64,7 +63,7 @@ var ( // layout control tabwidth = flag.Int("tabwidth", 4, "tab width") showTimestamps = flag.Bool("timestamps", true, "show timestamps with directory listings") - fulltextIndex = flag.Bool("fulltext", false, "build full text index for regular expression queries") + maxResults = flag.Int("maxresults", 10000, "maximum number of full text search results shown") // file system mapping fsMap Mapping // user-defined mapping @@ -80,7 +79,7 @@ var ( func initHandlers() { - fsMap.Init(*path) + fsMap.Init(*pkgPath) fileServer = http.FileServer(*goroot, "") cmdHandler = httpHandler{"/cmd/", pathutil.Join(*goroot, "src/cmd"), false} pkgHandler = httpHandler{"/pkg/", pathutil.Join(*goroot, "src/pkg"), true} @@ -626,11 +625,11 @@ func readTemplate(name string) *template.Template { path := pathutil.Join(*goroot, "lib/godoc/"+name) data, err := ioutil.ReadFile(path) if err != nil { - log.Exitf("ReadFile %s: %v", path, err) + log.Fatalf("ReadFile %s: %v", path, err) } t, err := template.Parse(string(data), fmap) if err != nil { - log.Exitf("%s: %v", name, err) + log.Fatalf("%s: %v", name, err) } return t } @@ -768,53 +767,6 @@ func redirect(w http.ResponseWriter, r *http.Request) (redirected bool) { } -// TODO(gri): Should have a mapping from extension to handler, eventually. - -// textExt[x] is true if the extension x indicates a text file, and false otherwise. -var textExt = map[string]bool{ - ".css": false, // must be served raw - ".js": false, // must be served raw -} - - -func isTextFile(path string) bool { - // if the extension is known, use it for decision making - if isText, found := textExt[pathutil.Ext(path)]; found { - return isText - } - - // the extension is not known; read an initial chunk of - // file and check if it looks like correct UTF-8; if it - // does, it's probably a text file - f, err := os.Open(path, os.O_RDONLY, 0) - if err != nil { - return false - } - defer f.Close() - - var buf [1024]byte - n, err := f.Read(buf[0:]) - if err != nil { - return false - } - - s := string(buf[0:n]) - n -= utf8.UTFMax // make sure there's enough bytes for a complete unicode char - for i, c := range s { - if i > n { - break - } - if c == 0xFFFD || c < ' ' && c != '\n' && c != '\t' { - // decoding error or control character - not a text file - return false - } - } - - // likely a text file - return true -} - - func serveTextFile(w http.ResponseWriter, r *http.Request, abspath, relpath, title string) { src, err := ioutil.ReadFile(abspath) if err != nil { @@ -1159,41 +1111,47 @@ type SearchResult struct { func lookup(query string) (result SearchResult) { result.Query = query - // determine identifier lookup string and full text regexp - lookupStr := "" - lookupRx, err := regexp.Compile(query) - if err != nil { - result.Alert = "Error in query regular expression: " + err.String() - return - } - if prefix, complete := lookupRx.LiteralPrefix(); complete { - // otherwise we lookup "" (with no result) because - // identifier lookup doesn't support regexp search - lookupStr = prefix - } + index, timestamp := searchIndex.get() + if index != nil { + index := index.(*Index) - if index, timestamp := searchIndex.get(); index != nil { // identifier search - index := index.(*Index) - result.Hit, result.Alt, err = index.Lookup(lookupStr) - if err != nil && !*fulltextIndex { - // ignore the error if there is full text search - // since it accepts that query regular expression + var err os.Error + result.Hit, result.Alt, err = index.Lookup(query) + if err != nil && *maxResults <= 0 { + // ignore the error if full text search is enabled + // since the query may be a valid regular expression result.Alert = "Error in query string: " + err.String() return } - // textual search - // TODO(gri) should max be a flag? - const max = 10000 // show at most this many fulltext results - result.Found, result.Textual = index.LookupRegexp(lookupRx, max+1) - result.Complete = result.Found <= max - - // is the result accurate? - if _, ts := fsModified.get(); timestamp < ts { - result.Alert = "Indexing in progress: result may be inaccurate" + // full text search + if *maxResults > 0 && query != "" { + rx, err := regexp.Compile(query) + if err != nil { + result.Alert = "Error in query regular expression: " + err.String() + return + } + // If we get maxResults+1 results we know that there are more than + // maxResults results and thus the result may be incomplete (to be + // precise, we should remove one result from the result set, but + // nobody is going to count the results on the result page). + result.Found, result.Textual = index.LookupRegexp(rx, *maxResults+1) + result.Complete = result.Found <= *maxResults + if !result.Complete { + result.Found-- // since we looked for maxResults+1 + } } } + + // is the result accurate? + if _, ts := fsModified.get(); timestamp < ts { + // The index is older than the latest file system change + // under godoc's observation. Indexing may be in progress + // or start shortly (see indexer()). + result.Alert = "Indexing in progress: result may be inaccurate" + } + return } @@ -1278,7 +1236,7 @@ func indexer() { log.Printf("updating index...") } start := time.Nanoseconds() - index := NewIndex(fsDirnames(), *fulltextIndex) + index := NewIndex(fsDirnames(), *maxResults > 0) stop := time.Nanoseconds() searchIndex.set(index) if *verbose { diff --git a/src/cmd/godoc/index.go b/src/cmd/godoc/index.go index ba6fe9acd..581409cde 100644 --- a/src/cmd/godoc/index.go +++ b/src/cmd/godoc/index.go @@ -47,7 +47,7 @@ import ( "index/suffixarray" "io/ioutil" "os" - pathutil "path" + "path" "regexp" "sort" "strings" @@ -430,8 +430,9 @@ func (a *AltWords) filter(s string) *AltWords { // Indexer // Adjust these flags as seems best. -const excludeMainPackages = false -const excludeTestFiles = false +const includeNonGoFiles = true +const includeMainPackages = true +const includeTestFiles = true type IndexResult struct { @@ -619,11 +620,14 @@ func pkgName(filename string) string { } -func (x *Indexer) addFile(filename string) *ast.File { +// addFile adds a file to the index if possible and returns the file set file +// and the file's AST if it was successfully parsed as a Go file. If addFile +// failed (that is, if the file was not added), it returns file == nil. +func (x *Indexer) addFile(filename string, goFile bool) (file *token.File, ast *ast.File) { // open file f, err := os.Open(filename, os.O_RDONLY, 0) if err != nil { - return nil + return } defer f.Close() @@ -643,59 +647,127 @@ func (x *Indexer) addFile(filename string) *ast.File { panic("internal error - file base incorrect") } - // append file contents to x.sources - if _, err := x.sources.ReadFrom(f); err != nil { - x.sources.Truncate(base) // discard possibly added data - return nil // ignore files with I/O errors - } + // append file contents (src) to x.sources + if _, err := x.sources.ReadFrom(f); err == nil { + src := x.sources.Bytes()[base:] - // parse the file and in the process add it to the file set - src := x.sources.Bytes()[base:] // no need to reread the file - file, err := parser.ParseFile(x.fset, filename, src, parser.ParseComments) - if err != nil { - // do not discard the added source code in this case - // because the file has been added to the file set and - // the source size must match the file set base - // TODO(gri): given a FileSet.RemoveFile() one might be - // able to discard the data here (worthwhile?) - return nil // ignore files with (parse) errors + if goFile { + // parse the file and in the process add it to the file set + if ast, err = parser.ParseFile(x.fset, filename, src, parser.ParseComments); err == nil { + file = x.fset.File(ast.Pos()) // ast.Pos() is inside the file + return + } + // file has parse errors, and the AST may be incorrect - + // set lines information explicitly and index as ordinary + // text file (cannot fall through to the text case below + // because the file has already been added to the file set + // by the parser) + file = x.fset.File(token.Pos(base)) // token.Pos(base) is inside the file + file.SetLinesForContent(src) + ast = nil + return + } + + if isText(src) { + // only add the file to the file set (for the full text index) + file = x.fset.AddFile(filename, x.fset.Base(), len(src)) + file.SetLinesForContent(src) + return + } } - return file + // discard possibly added data + x.sources.Truncate(base - 1) // -1 to remove added byte 0 since no file was added + return } -func (x *Indexer) visitFile(dirname string, f *os.FileInfo) { - if !isGoFile(f) { - return +// Design note: Using an explicit white list of permitted files for indexing +// makes sure that the important files are included and massively reduces the +// number of files to index. The advantage over a blacklist is that unexpected +// (non-blacklisted) files won't suddenly explode the index. +// +// TODO(gri): We may want to make this list customizable, perhaps via a flag. + +// Files are whitelisted if they have a file name or extension +// present as key in whitelisted. +var whitelisted = map[string]bool{ + ".bash": true, + ".c": true, + ".css": true, + ".go": true, + ".goc": true, + ".h": true, + ".html": true, + ".js": true, + ".out": true, + ".py": true, + ".s": true, + ".sh": true, + ".txt": true, + ".xml": true, + "AUTHORS": true, + "CONTRIBUTORS": true, + "LICENSE": true, + "Makefile": true, + "PATENTS": true, + "README": true, +} + + +// isWhitelisted returns true if a file is on the list +// of "permitted" files for indexing. The filename must +// be the directory-local name of the file. +func isWhitelisted(filename string) bool { + key := path.Ext(filename) + if key == "" { + // file has no extension - use entire filename + key = filename } + return whitelisted[key] +} - path := pathutil.Join(dirname, f.Name) - if excludeTestFiles && (!isPkgFile(f) || strings.HasPrefix(path, "test/")) { + +func (x *Indexer) visitFile(dirname string, f *os.FileInfo) { + if !f.IsRegular() { return } - if excludeMainPackages && pkgName(path) == "main" { + filename := path.Join(dirname, f.Name) + goFile := false + + switch { + case isGoFile(f): + if !includeTestFiles && (!isPkgFile(f) || strings.HasPrefix(filename, "test/")) { + return + } + if !includeMainPackages && pkgName(filename) == "main" { + return + } + goFile = true + + case !includeNonGoFiles || !isWhitelisted(f.Name): return } - file := x.addFile(path) + file, fast := x.addFile(filename, goFile) if file == nil { - return + return // addFile failed } - // we've got a file to index - x.current = x.fset.File(file.Pos()) // file.Pos is in the current file - dir, _ := pathutil.Split(path) - pak := Pak{dir, file.Name.Name} - x.file = &File{path, pak} - ast.Walk(x, file) + if fast != nil { + // we've got a Go file to index + x.current = file + dir, _ := path.Split(filename) + pak := Pak{dir, fast.Name.Name} + x.file = &File{filename, pak} + ast.Walk(x, fast) + } // update statistics - // (count real file size as opposed to using the padded x.sources.Len()) - x.stats.Bytes += x.current.Size() + x.stats.Bytes += file.Size() x.stats.Files++ - x.stats.Lines += x.current.LineCount() + x.stats.Lines += file.LineCount() } @@ -817,7 +889,8 @@ func (x *Index) LookupWord(w string) (match *LookupResult, alt *AltWords) { func isIdentifier(s string) bool { var S scanner.Scanner - S.Init(token.NewFileSet(), "", []byte(s), nil, 0) + fset := token.NewFileSet() + S.Init(fset.AddFile("", fset.Base(), len(s)), []byte(s), nil, 0) if _, tok, _ := S.Scan(); tok == token.IDENT { _, tok, _ := S.Scan() return tok == token.EOF diff --git a/src/cmd/godoc/main.go b/src/cmd/godoc/main.go index fe3d22fb9..f1b11a760 100644 --- a/src/cmd/godoc/main.go +++ b/src/cmd/godoc/main.go @@ -227,7 +227,7 @@ func main() { } if *tabwidth < 0 { - log.Exitf("negative tabwidth %d", *tabwidth) + log.Fatalf("negative tabwidth %d", *tabwidth) } initHandlers() @@ -242,8 +242,8 @@ func main() { log.Printf("address = %s", *httpAddr) log.Printf("goroot = %s", *goroot) log.Printf("tabwidth = %d", *tabwidth) - if *fulltextIndex { - log.Print("full text index enabled") + if *maxResults > 0 { + log.Printf("maxresults = %d (full text index enabled)", *maxResults) } if !fsMap.IsEmpty() { log.Print("user-defined mapping:") @@ -284,7 +284,7 @@ func main() { // Start http server. if err := http.ListenAndServe(*httpAddr, handler); err != nil { - log.Exitf("ListenAndServe %s: %v", *httpAddr, err) + log.Fatalf("ListenAndServe %s: %v", *httpAddr, err) } return @@ -301,7 +301,7 @@ func main() { for i := 0; i < flag.NArg(); i++ { res, err := remoteSearch(flag.Arg(i)) if err != nil { - log.Exitf("remoteSearch: %s", err) + log.Fatalf("remoteSearch: %s", err) } io.Copy(os.Stdout, res.Body) } @@ -344,7 +344,7 @@ func main() { info = cmdHandler.getPageInfo(abspath, relpath, "", mode) } if info.Err != nil { - log.Exitf("%v", info.Err) + log.Fatalf("%v", info.Err) } // If we have more than one argument, use the remaining arguments for filtering @@ -352,7 +352,7 @@ func main() { args := flag.Args()[1:] rx := makeRx(args) if rx == nil { - log.Exitf("illegal regular expression from %v", args) + log.Fatalf("illegal regular expression from %v", args) } filter := func(s string) bool { return rx.MatchString(s) } diff --git a/src/cmd/godoc/snippet.go b/src/cmd/godoc/snippet.go index 6a12febe1..c2838ed5a 100755 --- a/src/cmd/godoc/snippet.go +++ b/src/cmd/godoc/snippet.go @@ -26,7 +26,7 @@ type Snippet struct { func newSnippet(fset *token.FileSet, decl ast.Decl, id *ast.Ident) *Snippet { // TODO instead of pretty-printing the node, should use the original source instead var buf bytes.Buffer - writeNode(&buf, fset, decl, true) + writeNode(&buf, fset, decl, false) return &Snippet{fset.Position(id.Pos()).Line, FormatText(buf.Bytes(), -1, true, id.Name, nil)} } diff --git a/src/cmd/godoc/spec.go b/src/cmd/godoc/spec.go index b1c1a883f..a533c1e0a 100644 --- a/src/cmd/godoc/spec.go +++ b/src/cmd/godoc/spec.go @@ -156,7 +156,8 @@ func (p *ebnfParser) parse(fset *token.FileSet, out io.Writer, src []byte) { // initialize ebnfParser p.out = out p.src = src - p.file = p.scanner.Init(fset, "", src, p, 0) + p.file = fset.AddFile("", fset.Base(), len(src)) + p.scanner.Init(p.file, src, p, 0) p.next() // initializes pos, tok, lit // process source diff --git a/src/cmd/godoc/utils.go b/src/cmd/godoc/utils.go index 55cf87841..a032bd331 100644 --- a/src/cmd/godoc/utils.go +++ b/src/cmd/godoc/utils.go @@ -15,11 +15,13 @@ import ( "strings" "sync" "time" + "utf8" ) // An RWValue wraps a value and permits mutually exclusive // access to it and records the time the value was last set. +// type RWValue struct { mutex sync.RWMutex value interface{} @@ -107,3 +109,63 @@ func writeFileAtomically(filename string, data []byte) os.Error { } return os.Rename(f.Name(), filename) } + + +// isText returns true if a significant prefix of s looks like correct UTF-8; +// that is, if it is likely that s is human-readable text. +// +func isText(s []byte) bool { + const max = 1024 // at least utf8.UTFMax + if len(s) > max { + s = s[0:max] + } + for i, c := range string(s) { + if i+utf8.UTFMax > len(s) { + // last char may be incomplete - ignore + break + } + if c == 0xFFFD || c < ' ' && c != '\n' && c != '\t' { + // decoding error or control character - not a text file + return false + } + } + return true +} + + +// TODO(gri): Should have a mapping from extension to handler, eventually. + +// textExt[x] is true if the extension x indicates a text file, and false otherwise. +var textExt = map[string]bool{ + ".css": false, // must be served raw + ".js": false, // must be served raw +} + + +// isTextFile returns true if the file has a known extension indicating +// a text file, or if a significant chunk of the specified file looks like +// correct UTF-8; that is, if it is likely that the file contains human- +// readable text. +// +func isTextFile(filename string) bool { + // if the extension is known, use it for decision making + if isText, found := textExt[pathutil.Ext(filename)]; found { + return isText + } + + // the extension is not known; read an initial chunk + // of the file and check if it looks like text + f, err := os.Open(filename, os.O_RDONLY, 0) + if err != nil { + return false + } + defer f.Close() + + var buf [1024]byte + n, err := f.Read(buf[0:]) + if err != nil { + return false + } + + return isText(buf[0:n]) +} |