diff options
Diffstat (limited to 'src/cmd/godoc/index.go')
-rw-r--r-- | src/cmd/godoc/index.go | 403 |
1 files changed, 252 insertions, 151 deletions
diff --git a/src/cmd/godoc/index.go b/src/cmd/godoc/index.go index 9b4f31514..1bef79693 100644 --- a/src/cmd/godoc/index.go +++ b/src/cmd/godoc/index.go @@ -7,7 +7,7 @@ // // Algorithm for identifier index: // - traverse all .go files of the file tree specified by root -// - for each word (identifier) encountered, collect all occurrences (spots) +// - for each identifier (word) encountered, collect all occurrences (spots) // into a list; this produces a list of spots for each word // - reduce the lists: from a list of spots to a list of FileRuns, // and from a list of FileRuns into a list of PakRuns @@ -38,62 +38,80 @@ package main import ( + "bufio" "bytes" - "container/vector" + "encoding/gob" + "errors" "go/ast" "go/parser" "go/token" - "go/scanner" "index/suffixarray" + "io" "os" - "path/filepath" + pathpkg "path" "regexp" "sort" "strings" + "time" + "unicode" ) // ---------------------------------------------------------------------------- +// InterfaceSlice is a helper type for sorting interface +// slices according to some slice-specific sort criteria. + +type Comparer func(x, y interface{}) bool + +type InterfaceSlice struct { + slice []interface{} + less Comparer +} + +func (p *InterfaceSlice) Len() int { return len(p.slice) } +func (p *InterfaceSlice) Less(i, j int) bool { return p.less(p.slice[i], p.slice[j]) } +func (p *InterfaceSlice) Swap(i, j int) { p.slice[i], p.slice[j] = p.slice[j], p.slice[i] } + +// ---------------------------------------------------------------------------- // RunList -// A RunList is a vector of entries that can be sorted according to some +// A RunList is a list of entries that can be sorted according to some // criteria. A RunList may be compressed by grouping "runs" of entries // which are equal (according to the sort critera) into a new RunList of // runs. For instance, a RunList containing pairs (x, y) may be compressed // into a RunList containing pair runs (x, {y}) where each run consists of // a list of y's with the same x. -type RunList struct { - vector.Vector - less func(x, y interface{}) bool -} - -func (h *RunList) Less(i, j int) bool { return h.less(h.At(i), h.At(j)) } +type RunList []interface{} -func (h *RunList) sort(less func(x, y interface{}) bool) { - h.less = less - sort.Sort(h) +func (h RunList) sort(less Comparer) { + sort.Sort(&InterfaceSlice{h, less}) } // Compress entries which are the same according to a sort criteria // (specified by less) into "runs". -func (h *RunList) reduce(less func(x, y interface{}) bool, newRun func(h *RunList, i, j int) interface{}) *RunList { +func (h RunList) reduce(less Comparer, newRun func(h RunList) interface{}) RunList { + if len(h) == 0 { + return nil + } + // len(h) > 0 + // create runs of entries with equal values h.sort(less) // for each run, make a new run object and collect them in a new RunList var hh RunList - i := 0 - for j := 0; j < h.Len(); j++ { - if less(h.At(i), h.At(j)) { - hh.Push(newRun(h, i, j)) - i = j // start a new run + i, x := 0, h[0] + for j, y := range h { + if less(x, y) { + hh = append(hh, newRun(h[i:j])) + i, x = j, h[j] // start a new run } } // add final run, if any - if i < h.Len() { - hh.Push(newRun(h, i, h.Len())) + if i < len(h) { + hh = append(hh, newRun(h[i:])) } - return &hh + return hh } // ---------------------------------------------------------------------------- @@ -164,30 +182,25 @@ func (x SpotInfo) IsIndex() bool { return x&1 != 0 } const removeDuplicates = true // A KindRun is a run of SpotInfos of the same kind in a given file. -type KindRun struct { - Kind SpotKind - Infos []SpotInfo -} +// The kind (3 bits) is stored in each SpotInfo element; to find the +// kind of a KindRun, look at any of it's elements. +type KindRun []SpotInfo // KindRuns are sorted by line number or index. Since the isIndex bit // is always the same for all infos in one list we can compare lori's. -func (f *KindRun) Len() int { return len(f.Infos) } -func (f *KindRun) Less(i, j int) bool { return f.Infos[i].Lori() < f.Infos[j].Lori() } -func (f *KindRun) Swap(i, j int) { f.Infos[i], f.Infos[j] = f.Infos[j], f.Infos[i] } +func (k KindRun) Len() int { return len(k) } +func (k KindRun) Less(i, j int) bool { return k[i].Lori() < k[j].Lori() } +func (k KindRun) Swap(i, j int) { k[i], k[j] = k[j], k[i] } // FileRun contents are sorted by Kind for the reduction into KindRuns. func lessKind(x, y interface{}) bool { return x.(SpotInfo).Kind() < y.(SpotInfo).Kind() } -// newKindRun allocates a new KindRun from the SpotInfo run [i, j) in h. -func newKindRun(h *RunList, i, j int) interface{} { - kind := h.At(i).(SpotInfo).Kind() - infos := make([]SpotInfo, j-i) - k := 0 - for ; i < j; i++ { - infos[k] = h.At(i).(SpotInfo) - k++ +// newKindRun allocates a new KindRun from the SpotInfo run h. +func newKindRun(h RunList) interface{} { + run := make(KindRun, len(h)) + for i, x := range h { + run[i] = x.(SpotInfo) } - run := &KindRun{kind, infos} // Spots were sorted by file and kind to create this run. // Within this run, sort them by line number or index. @@ -199,15 +212,15 @@ func newKindRun(h *RunList, i, j int) interface{} { // bit is always the same for all infos in one // list we can simply compare the entire info. k := 0 - var prev SpotInfo - for i, x := range infos { - if x != prev || i == 0 { - infos[k] = x + prev := SpotInfo(1<<32 - 1) // an unlikely value + for _, x := range run { + if x != prev { + run[k] = x k++ prev = x } } - run.Infos = infos[0:k] + run = run[0:k] } return run @@ -229,8 +242,13 @@ func (p *Pak) less(q *Pak) bool { // A File describes a Go file. type File struct { - Path string // complete file name - Pak Pak // the package to which the file belongs + Name string // directory-local file name + Pak *Pak // the package to which the file belongs +} + +// Path returns the file path of f. +func (f *File) Path() string { + return pathpkg.Join(f.Pak.Path, f.Name) } // A Spot describes a single occurrence of a word. @@ -242,30 +260,34 @@ type Spot struct { // A FileRun is a list of KindRuns belonging to the same file. type FileRun struct { File *File - Groups []*KindRun + Groups []KindRun } -// Spots are sorted by path for the reduction into FileRuns. -func lessSpot(x, y interface{}) bool { return x.(Spot).File.Path < y.(Spot).File.Path } +// Spots are sorted by file path for the reduction into FileRuns. +func lessSpot(x, y interface{}) bool { + fx := x.(Spot).File + fy := y.(Spot).File + // same as "return fx.Path() < fy.Path()" but w/o computing the file path first + px := fx.Pak.Path + py := fy.Pak.Path + return px < py || px == py && fx.Name < fy.Name +} -// newFileRun allocates a new FileRun from the Spot run [i, j) in h. -func newFileRun(h0 *RunList, i, j int) interface{} { - file := h0.At(i).(Spot).File +// newFileRun allocates a new FileRun from the Spot run h. +func newFileRun(h RunList) interface{} { + file := h[0].(Spot).File // reduce the list of Spots into a list of KindRuns - var h1 RunList - h1.Vector.Resize(j-i, 0) - k := 0 - for ; i < j; i++ { - h1.Set(k, h0.At(i).(Spot).Info) - k++ + h1 := make(RunList, len(h)) + for i, x := range h { + h1[i] = x.(Spot).Info } h2 := h1.reduce(lessKind, newKindRun) // create the FileRun - groups := make([]*KindRun, h2.Len()) - for i := 0; i < h2.Len(); i++ { - groups[i] = h2.At(i).(*KindRun) + groups := make([]KindRun, len(h2)) + for i, x := range h2 { + groups[i] = x.(KindRun) } return &FileRun{file, groups} } @@ -275,28 +297,26 @@ func newFileRun(h0 *RunList, i, j int) interface{} { // A PakRun describes a run of *FileRuns of a package. type PakRun struct { - Pak Pak + Pak *Pak Files []*FileRun } // Sorting support for files within a PakRun. func (p *PakRun) Len() int { return len(p.Files) } -func (p *PakRun) Less(i, j int) bool { return p.Files[i].File.Path < p.Files[j].File.Path } +func (p *PakRun) Less(i, j int) bool { return p.Files[i].File.Name < p.Files[j].File.Name } func (p *PakRun) Swap(i, j int) { p.Files[i], p.Files[j] = p.Files[j], p.Files[i] } // FileRuns are sorted by package for the reduction into PakRuns. func lessFileRun(x, y interface{}) bool { - return x.(*FileRun).File.Pak.less(&y.(*FileRun).File.Pak) + return x.(*FileRun).File.Pak.less(y.(*FileRun).File.Pak) } -// newPakRun allocates a new PakRun from the *FileRun run [i, j) in h. -func newPakRun(h *RunList, i, j int) interface{} { - pak := h.At(i).(*FileRun).File.Pak - files := make([]*FileRun, j-i) - k := 0 - for ; i < j; i++ { - files[k] = h.At(i).(*FileRun) - k++ +// newPakRun allocates a new PakRun from the *FileRun run h. +func newPakRun(h RunList) interface{} { + pak := h[0].(*FileRun).File.Pak + files := make([]*FileRun, len(h)) + for i, x := range h { + files[i] = x.(*FileRun) } run := &PakRun{pak, files} sort.Sort(run) // files were sorted by package; sort them by file now @@ -310,9 +330,9 @@ func newPakRun(h *RunList, i, j int) interface{} { type HitList []*PakRun // PakRuns are sorted by package. -func lessPakRun(x, y interface{}) bool { return x.(*PakRun).Pak.less(&y.(*PakRun).Pak) } +func lessPakRun(x, y interface{}) bool { return x.(*PakRun).Pak.less(y.(*PakRun).Pak) } -func reduce(h0 *RunList) HitList { +func reduce(h0 RunList) HitList { // reduce a list of Spots into a list of FileRuns h1 := h0.reduce(lessSpot, newFileRun) // reduce a list of FileRuns into a list of PakRuns @@ -320,28 +340,20 @@ func reduce(h0 *RunList) HitList { // sort the list of PakRuns by package h2.sort(lessPakRun) // create a HitList - h := make(HitList, h2.Len()) - for i := 0; i < h2.Len(); i++ { - h[i] = h2.At(i).(*PakRun) + h := make(HitList, len(h2)) + for i, p := range h2 { + h[i] = p.(*PakRun) } return h } +// filter returns a new HitList created by filtering +// all PakRuns from h that have a matching pakname. func (h HitList) filter(pakname string) HitList { - // determine number of matching packages (most of the time just one) - n := 0 - for _, p := range h { - if p.Pak.Name == pakname { - n++ - } - } - // create filtered HitList - hh := make(HitList, n) - i := 0 + var hh HitList for _, p := range h { if p.Pak.Name == pakname { - hh[i] = p - i++ + hh = append(hh, p) } } return hh @@ -365,34 +377,27 @@ type AltWords struct { // wordPairs are sorted by their canonical spelling. func lessWordPair(x, y interface{}) bool { return x.(*wordPair).canon < y.(*wordPair).canon } -// newAltWords allocates a new AltWords from the *wordPair run [i, j) in h. -func newAltWords(h *RunList, i, j int) interface{} { - canon := h.At(i).(*wordPair).canon - alts := make([]string, j-i) - k := 0 - for ; i < j; i++ { - alts[k] = h.At(i).(*wordPair).alt - k++ +// newAltWords allocates a new AltWords from the *wordPair run h. +func newAltWords(h RunList) interface{} { + canon := h[0].(*wordPair).canon + alts := make([]string, len(h)) + for i, x := range h { + alts[i] = x.(*wordPair).alt } return &AltWords{canon, alts} } func (a *AltWords) filter(s string) *AltWords { - if len(a.Alts) == 1 && a.Alts[0] == s { - // there are no different alternatives - return nil - } - - // make a new AltWords with the current spelling removed - alts := make([]string, len(a.Alts)) - i := 0 + var alts []string for _, w := range a.Alts { if w != s { - alts[i] = w - i++ + alts = append(alts, w) } } - return &AltWords{a.Canon, alts[0:i]} + if len(alts) > 0 { + return &AltWords{a.Canon, alts} + } + return nil } // ---------------------------------------------------------------------------- @@ -423,17 +428,32 @@ type Statistics struct { type Indexer struct { fset *token.FileSet // file set for all indexed files sources bytes.Buffer // concatenated sources + packages map[string]*Pak // map of canonicalized *Paks words map[string]*IndexResult // RunLists of Spots - snippets vector.Vector // vector of *Snippets, indexed by snippet indices + snippets []*Snippet // indices are stored in SpotInfos current *token.File // last file added to file set file *File // AST for current file decl ast.Decl // AST for current decl stats Statistics } +func (x *Indexer) lookupPackage(path, name string) *Pak { + // In the source directory tree, more than one package may + // live in the same directory. For the packages map, construct + // a key that includes both the directory path and the package + // name. + key := path + ":" + name + pak := x.packages[key] + if pak == nil { + pak = &Pak{path, name} + x.packages[key] = pak + } + return pak +} + func (x *Indexer) addSnippet(s *Snippet) int { - index := x.snippets.Len() - x.snippets.Push(s) + index := len(x.snippets) + x.snippets = append(x.snippets, s) return index } @@ -454,12 +474,12 @@ func (x *Indexer) visitIdent(kind SpotKind, id *ast.Ident) { if kind == Use || x.decl == nil { // not a declaration or no snippet required info := makeSpotInfo(kind, x.current.Line(id.Pos()), false) - lists.Others.Push(Spot{x.file, info}) + lists.Others = append(lists.Others, Spot{x.file, info}) } else { // a declaration with snippet index := x.addSnippet(NewSnippet(x.fset, x.decl, id)) info := makeSpotInfo(kind, index, true) - lists.Decls.Push(Spot{x.file, info}) + lists.Decls = append(lists.Decls, Spot{x.file, info}) } x.stats.Spots++ @@ -675,7 +695,7 @@ var whitelisted = map[string]bool{ // of "permitted" files for indexing. The filename must // be the directory-local name of the file. func isWhitelisted(filename string) bool { - key := filepath.Ext(filename) + key := pathpkg.Ext(filename) if key == "" { // file has no extension - use entire filename key = filename @@ -683,12 +703,12 @@ func isWhitelisted(filename string) bool { return whitelisted[key] } -func (x *Indexer) visitFile(dirname string, f FileInfo, fulltextIndex bool) { - if !f.IsRegular() { +func (x *Indexer) visitFile(dirname string, f os.FileInfo, fulltextIndex bool) { + if f.IsDir() { return } - filename := filepath.Join(dirname, f.Name()) + filename := pathpkg.Join(dirname, f.Name()) goFile := false switch { @@ -713,9 +733,8 @@ func (x *Indexer) visitFile(dirname string, f FileInfo, fulltextIndex bool) { if fast != nil { // we've got a Go file to index x.current = file - dir, _ := filepath.Split(filename) - pak := Pak{dir, fast.Name.Name} - x.file = &File{filename, pak} + pak := x.lookupPackage(dirname, fast.Name.Name) + x.file = &File{f.Name(), pak} ast.Walk(x, fast) } @@ -747,12 +766,15 @@ func canonical(w string) string { return strings.ToLower(w) } // NewIndex creates a new index for the .go files // in the directories given by dirnames. // -func NewIndex(dirnames <-chan string, fulltextIndex bool) *Index { +func NewIndex(dirnames <-chan string, fulltextIndex bool, throttle float64) *Index { var x Indexer + th := NewThrottle(throttle, 100*time.Millisecond) // run at least 0.1s at a time // initialize Indexer + // (use some reasonably sized maps to start) x.fset = token.NewFileSet() - x.words = make(map[string]*IndexResult) + x.packages = make(map[string]*Pak, 256) + x.words = make(map[string]*IndexResult, 8192) // index all files in the directories given by dirnames for dirname := range dirnames { @@ -761,9 +783,10 @@ func NewIndex(dirnames <-chan string, fulltextIndex bool) *Index { continue // ignore this directory } for _, f := range list { - if !f.IsDirectory() { + if !f.IsDir() { x.visitFile(dirname, f, fulltextIndex) } + th.Throttle() } } @@ -782,13 +805,14 @@ func NewIndex(dirnames <-chan string, fulltextIndex bool) *Index { words := make(map[string]*LookupResult) var wlist RunList for w, h := range x.words { - decls := reduce(&h.Decls) - others := reduce(&h.Others) + decls := reduce(h.Decls) + others := reduce(h.Others) words[w] = &LookupResult{ Decls: decls, Others: others, } - wlist.Push(&wordPair{canonical(w), w}) + wlist = append(wlist, &wordPair{canonical(w), w}) + th.Throttle() } x.stats.Words = len(words) @@ -798,24 +822,92 @@ func NewIndex(dirnames <-chan string, fulltextIndex bool) *Index { // convert alist into a map of alternative spellings alts := make(map[string]*AltWords) - for i := 0; i < alist.Len(); i++ { - a := alist.At(i).(*AltWords) + for i := 0; i < len(alist); i++ { + a := alist[i].(*AltWords) alts[a.Canon] = a } - // convert snippet vector into a list - snippets := make([]*Snippet, x.snippets.Len()) - for i := 0; i < x.snippets.Len(); i++ { - snippets[i] = x.snippets.At(i).(*Snippet) - } - // create text index var suffixes *suffixarray.Index if fulltextIndex { suffixes = suffixarray.New(x.sources.Bytes()) } - return &Index{x.fset, suffixes, words, alts, snippets, x.stats} + return &Index{x.fset, suffixes, words, alts, x.snippets, x.stats} +} + +type fileIndex struct { + Words map[string]*LookupResult + Alts map[string]*AltWords + Snippets []*Snippet + Fulltext bool +} + +func (x *fileIndex) Write(w io.Writer) error { + return gob.NewEncoder(w).Encode(x) +} + +func (x *fileIndex) Read(r io.Reader) error { + return gob.NewDecoder(r).Decode(x) +} + +// Write writes the index x to w. +func (x *Index) Write(w io.Writer) error { + fulltext := false + if x.suffixes != nil { + fulltext = true + } + fx := fileIndex{ + x.words, + x.alts, + x.snippets, + fulltext, + } + if err := fx.Write(w); err != nil { + return err + } + if fulltext { + encode := func(x interface{}) error { + return gob.NewEncoder(w).Encode(x) + } + if err := x.fset.Write(encode); err != nil { + return err + } + if err := x.suffixes.Write(w); err != nil { + return err + } + } + return nil +} + +// Read reads the index from r into x; x must not be nil. +// If r does not also implement io.ByteReader, it will be wrapped in a bufio.Reader. +func (x *Index) Read(r io.Reader) error { + // We use the ability to read bytes as a plausible surrogate for buffering. + if _, ok := r.(io.ByteReader); !ok { + r = bufio.NewReader(r) + } + var fx fileIndex + if err := fx.Read(r); err != nil { + return err + } + x.words = fx.Words + x.alts = fx.Alts + x.snippets = fx.Snippets + if fx.Fulltext { + x.fset = token.NewFileSet() + decode := func(x interface{}) error { + return gob.NewDecoder(r).Decode(x) + } + if err := x.fset.Read(decode); err != nil { + return err + } + x.suffixes = new(suffixarray.Index) + if err := x.suffixes.Read(r); err != nil { + return err + } + } + return nil } // Stats() returns index statistics. @@ -823,7 +915,7 @@ func (x *Index) Stats() Statistics { return x.stats } -func (x *Index) LookupWord(w string) (match *LookupResult, alt *AltWords) { +func (x *Index) lookupWord(w string) (match *LookupResult, alt *AltWords) { match = x.words[w] alt = x.alts[canonical(w)] // remove current spelling from alternatives @@ -835,47 +927,56 @@ func (x *Index) LookupWord(w string) (match *LookupResult, alt *AltWords) { return } +// isIdentifier reports whether s is a Go identifier. func isIdentifier(s string) bool { - var S scanner.Scanner - fset := token.NewFileSet() - S.Init(fset.AddFile("", fset.Base(), len(s)), []byte(s), nil, 0) - if _, tok, _ := S.Scan(); tok == token.IDENT { - _, tok, _ := S.Scan() - return tok == token.EOF + for i, ch := range s { + if unicode.IsLetter(ch) || ch == ' ' || i > 0 && unicode.IsDigit(ch) { + continue + } + return false } - return false + return len(s) > 0 } // For a given query, which is either a single identifier or a qualified -// identifier, Lookup returns a LookupResult, and a list of alternative -// spellings, if any. If the query syntax is wrong, an error is reported. -func (x *Index) Lookup(query string) (match *LookupResult, alt *AltWords, err os.Error) { +// identifier, Lookup returns a list of packages, a LookupResult, and a +// list of alternative spellings, if any. Any and all results may be nil. +// If the query syntax is wrong, an error is reported. +func (x *Index) Lookup(query string) (paks HitList, match *LookupResult, alt *AltWords, err error) { ss := strings.Split(query, ".") // check query syntax for _, s := range ss { if !isIdentifier(s) { - err = os.NewError("all query parts must be identifiers") + err = errors.New("all query parts must be identifiers") return } } + // handle simple and qualified identifiers switch len(ss) { case 1: - match, alt = x.LookupWord(ss[0]) + ident := ss[0] + match, alt = x.lookupWord(ident) + if match != nil { + // found a match - filter packages with same name + // for the list of packages called ident, if any + paks = match.Others.filter(ident) + } case 2: - pakname := ss[0] - match, alt = x.LookupWord(ss[1]) + pakname, ident := ss[0], ss[1] + match, alt = x.lookupWord(ident) if match != nil { // found a match - filter by package name + // (no paks - package names are not qualified) decls := match.Decls.filter(pakname) others := match.Others.filter(pakname) match = &LookupResult{decls, others} } default: - err = os.NewError("query is not a (qualified) identifier") + err = errors.New("query is not a (qualified) identifier") } return |