diff options
Diffstat (limited to 'src/cmd/godoc/index.go')
-rw-r--r-- | src/cmd/godoc/index.go | 151 |
1 files changed, 112 insertions, 39 deletions
diff --git a/src/cmd/godoc/index.go b/src/cmd/godoc/index.go index ba6fe9acd..581409cde 100644 --- a/src/cmd/godoc/index.go +++ b/src/cmd/godoc/index.go @@ -47,7 +47,7 @@ import ( "index/suffixarray" "io/ioutil" "os" - pathutil "path" + "path" "regexp" "sort" "strings" @@ -430,8 +430,9 @@ func (a *AltWords) filter(s string) *AltWords { // Indexer // Adjust these flags as seems best. -const excludeMainPackages = false -const excludeTestFiles = false +const includeNonGoFiles = true +const includeMainPackages = true +const includeTestFiles = true type IndexResult struct { @@ -619,11 +620,14 @@ func pkgName(filename string) string { } -func (x *Indexer) addFile(filename string) *ast.File { +// addFile adds a file to the index if possible and returns the file set file +// and the file's AST if it was successfully parsed as a Go file. If addFile +// failed (that is, if the file was not added), it returns file == nil. +func (x *Indexer) addFile(filename string, goFile bool) (file *token.File, ast *ast.File) { // open file f, err := os.Open(filename, os.O_RDONLY, 0) if err != nil { - return nil + return } defer f.Close() @@ -643,59 +647,127 @@ func (x *Indexer) addFile(filename string) *ast.File { panic("internal error - file base incorrect") } - // append file contents to x.sources - if _, err := x.sources.ReadFrom(f); err != nil { - x.sources.Truncate(base) // discard possibly added data - return nil // ignore files with I/O errors - } + // append file contents (src) to x.sources + if _, err := x.sources.ReadFrom(f); err == nil { + src := x.sources.Bytes()[base:] - // parse the file and in the process add it to the file set - src := x.sources.Bytes()[base:] // no need to reread the file - file, err := parser.ParseFile(x.fset, filename, src, parser.ParseComments) - if err != nil { - // do not discard the added source code in this case - // because the file has been added to the file set and - // the source size must match the file set base - // TODO(gri): given a FileSet.RemoveFile() one might be - // able to discard the data here (worthwhile?) - return nil // ignore files with (parse) errors + if goFile { + // parse the file and in the process add it to the file set + if ast, err = parser.ParseFile(x.fset, filename, src, parser.ParseComments); err == nil { + file = x.fset.File(ast.Pos()) // ast.Pos() is inside the file + return + } + // file has parse errors, and the AST may be incorrect - + // set lines information explicitly and index as ordinary + // text file (cannot fall through to the text case below + // because the file has already been added to the file set + // by the parser) + file = x.fset.File(token.Pos(base)) // token.Pos(base) is inside the file + file.SetLinesForContent(src) + ast = nil + return + } + + if isText(src) { + // only add the file to the file set (for the full text index) + file = x.fset.AddFile(filename, x.fset.Base(), len(src)) + file.SetLinesForContent(src) + return + } } - return file + // discard possibly added data + x.sources.Truncate(base - 1) // -1 to remove added byte 0 since no file was added + return } -func (x *Indexer) visitFile(dirname string, f *os.FileInfo) { - if !isGoFile(f) { - return +// Design note: Using an explicit white list of permitted files for indexing +// makes sure that the important files are included and massively reduces the +// number of files to index. The advantage over a blacklist is that unexpected +// (non-blacklisted) files won't suddenly explode the index. +// +// TODO(gri): We may want to make this list customizable, perhaps via a flag. + +// Files are whitelisted if they have a file name or extension +// present as key in whitelisted. +var whitelisted = map[string]bool{ + ".bash": true, + ".c": true, + ".css": true, + ".go": true, + ".goc": true, + ".h": true, + ".html": true, + ".js": true, + ".out": true, + ".py": true, + ".s": true, + ".sh": true, + ".txt": true, + ".xml": true, + "AUTHORS": true, + "CONTRIBUTORS": true, + "LICENSE": true, + "Makefile": true, + "PATENTS": true, + "README": true, +} + + +// isWhitelisted returns true if a file is on the list +// of "permitted" files for indexing. The filename must +// be the directory-local name of the file. +func isWhitelisted(filename string) bool { + key := path.Ext(filename) + if key == "" { + // file has no extension - use entire filename + key = filename } + return whitelisted[key] +} - path := pathutil.Join(dirname, f.Name) - if excludeTestFiles && (!isPkgFile(f) || strings.HasPrefix(path, "test/")) { + +func (x *Indexer) visitFile(dirname string, f *os.FileInfo) { + if !f.IsRegular() { return } - if excludeMainPackages && pkgName(path) == "main" { + filename := path.Join(dirname, f.Name) + goFile := false + + switch { + case isGoFile(f): + if !includeTestFiles && (!isPkgFile(f) || strings.HasPrefix(filename, "test/")) { + return + } + if !includeMainPackages && pkgName(filename) == "main" { + return + } + goFile = true + + case !includeNonGoFiles || !isWhitelisted(f.Name): return } - file := x.addFile(path) + file, fast := x.addFile(filename, goFile) if file == nil { - return + return // addFile failed } - // we've got a file to index - x.current = x.fset.File(file.Pos()) // file.Pos is in the current file - dir, _ := pathutil.Split(path) - pak := Pak{dir, file.Name.Name} - x.file = &File{path, pak} - ast.Walk(x, file) + if fast != nil { + // we've got a Go file to index + x.current = file + dir, _ := path.Split(filename) + pak := Pak{dir, fast.Name.Name} + x.file = &File{filename, pak} + ast.Walk(x, fast) + } // update statistics - // (count real file size as opposed to using the padded x.sources.Len()) - x.stats.Bytes += x.current.Size() + x.stats.Bytes += file.Size() x.stats.Files++ - x.stats.Lines += x.current.LineCount() + x.stats.Lines += file.LineCount() } @@ -817,7 +889,8 @@ func (x *Index) LookupWord(w string) (match *LookupResult, alt *AltWords) { func isIdentifier(s string) bool { var S scanner.Scanner - S.Init(token.NewFileSet(), "", []byte(s), nil, 0) + fset := token.NewFileSet() + S.Init(fset.AddFile("", fset.Base(), len(s)), []byte(s), nil, 0) if _, tok, _ := S.Scan(); tok == token.IDENT { _, tok, _ := S.Scan() return tok == token.EOF |