summaryrefslogtreecommitdiff
path: root/src/cmd/godoc/index.go
diff options
context:
space:
mode:
Diffstat (limited to 'src/cmd/godoc/index.go')
-rw-r--r--src/cmd/godoc/index.go403
1 files changed, 252 insertions, 151 deletions
diff --git a/src/cmd/godoc/index.go b/src/cmd/godoc/index.go
index 9b4f31514..1bef79693 100644
--- a/src/cmd/godoc/index.go
+++ b/src/cmd/godoc/index.go
@@ -7,7 +7,7 @@
//
// Algorithm for identifier index:
// - traverse all .go files of the file tree specified by root
-// - for each word (identifier) encountered, collect all occurrences (spots)
+// - for each identifier (word) encountered, collect all occurrences (spots)
// into a list; this produces a list of spots for each word
// - reduce the lists: from a list of spots to a list of FileRuns,
// and from a list of FileRuns into a list of PakRuns
@@ -38,62 +38,80 @@
package main
import (
+ "bufio"
"bytes"
- "container/vector"
+ "encoding/gob"
+ "errors"
"go/ast"
"go/parser"
"go/token"
- "go/scanner"
"index/suffixarray"
+ "io"
"os"
- "path/filepath"
+ pathpkg "path"
"regexp"
"sort"
"strings"
+ "time"
+ "unicode"
)
// ----------------------------------------------------------------------------
+// InterfaceSlice is a helper type for sorting interface
+// slices according to some slice-specific sort criteria.
+
+type Comparer func(x, y interface{}) bool
+
+type InterfaceSlice struct {
+ slice []interface{}
+ less Comparer
+}
+
+func (p *InterfaceSlice) Len() int { return len(p.slice) }
+func (p *InterfaceSlice) Less(i, j int) bool { return p.less(p.slice[i], p.slice[j]) }
+func (p *InterfaceSlice) Swap(i, j int) { p.slice[i], p.slice[j] = p.slice[j], p.slice[i] }
+
+// ----------------------------------------------------------------------------
// RunList
-// A RunList is a vector of entries that can be sorted according to some
+// A RunList is a list of entries that can be sorted according to some
// criteria. A RunList may be compressed by grouping "runs" of entries
// which are equal (according to the sort critera) into a new RunList of
// runs. For instance, a RunList containing pairs (x, y) may be compressed
// into a RunList containing pair runs (x, {y}) where each run consists of
// a list of y's with the same x.
-type RunList struct {
- vector.Vector
- less func(x, y interface{}) bool
-}
-
-func (h *RunList) Less(i, j int) bool { return h.less(h.At(i), h.At(j)) }
+type RunList []interface{}
-func (h *RunList) sort(less func(x, y interface{}) bool) {
- h.less = less
- sort.Sort(h)
+func (h RunList) sort(less Comparer) {
+ sort.Sort(&InterfaceSlice{h, less})
}
// Compress entries which are the same according to a sort criteria
// (specified by less) into "runs".
-func (h *RunList) reduce(less func(x, y interface{}) bool, newRun func(h *RunList, i, j int) interface{}) *RunList {
+func (h RunList) reduce(less Comparer, newRun func(h RunList) interface{}) RunList {
+ if len(h) == 0 {
+ return nil
+ }
+ // len(h) > 0
+
// create runs of entries with equal values
h.sort(less)
// for each run, make a new run object and collect them in a new RunList
var hh RunList
- i := 0
- for j := 0; j < h.Len(); j++ {
- if less(h.At(i), h.At(j)) {
- hh.Push(newRun(h, i, j))
- i = j // start a new run
+ i, x := 0, h[0]
+ for j, y := range h {
+ if less(x, y) {
+ hh = append(hh, newRun(h[i:j]))
+ i, x = j, h[j] // start a new run
}
}
// add final run, if any
- if i < h.Len() {
- hh.Push(newRun(h, i, h.Len()))
+ if i < len(h) {
+ hh = append(hh, newRun(h[i:]))
}
- return &hh
+ return hh
}
// ----------------------------------------------------------------------------
@@ -164,30 +182,25 @@ func (x SpotInfo) IsIndex() bool { return x&1 != 0 }
const removeDuplicates = true
// A KindRun is a run of SpotInfos of the same kind in a given file.
-type KindRun struct {
- Kind SpotKind
- Infos []SpotInfo
-}
+// The kind (3 bits) is stored in each SpotInfo element; to find the
+// kind of a KindRun, look at any of it's elements.
+type KindRun []SpotInfo
// KindRuns are sorted by line number or index. Since the isIndex bit
// is always the same for all infos in one list we can compare lori's.
-func (f *KindRun) Len() int { return len(f.Infos) }
-func (f *KindRun) Less(i, j int) bool { return f.Infos[i].Lori() < f.Infos[j].Lori() }
-func (f *KindRun) Swap(i, j int) { f.Infos[i], f.Infos[j] = f.Infos[j], f.Infos[i] }
+func (k KindRun) Len() int { return len(k) }
+func (k KindRun) Less(i, j int) bool { return k[i].Lori() < k[j].Lori() }
+func (k KindRun) Swap(i, j int) { k[i], k[j] = k[j], k[i] }
// FileRun contents are sorted by Kind for the reduction into KindRuns.
func lessKind(x, y interface{}) bool { return x.(SpotInfo).Kind() < y.(SpotInfo).Kind() }
-// newKindRun allocates a new KindRun from the SpotInfo run [i, j) in h.
-func newKindRun(h *RunList, i, j int) interface{} {
- kind := h.At(i).(SpotInfo).Kind()
- infos := make([]SpotInfo, j-i)
- k := 0
- for ; i < j; i++ {
- infos[k] = h.At(i).(SpotInfo)
- k++
+// newKindRun allocates a new KindRun from the SpotInfo run h.
+func newKindRun(h RunList) interface{} {
+ run := make(KindRun, len(h))
+ for i, x := range h {
+ run[i] = x.(SpotInfo)
}
- run := &KindRun{kind, infos}
// Spots were sorted by file and kind to create this run.
// Within this run, sort them by line number or index.
@@ -199,15 +212,15 @@ func newKindRun(h *RunList, i, j int) interface{} {
// bit is always the same for all infos in one
// list we can simply compare the entire info.
k := 0
- var prev SpotInfo
- for i, x := range infos {
- if x != prev || i == 0 {
- infos[k] = x
+ prev := SpotInfo(1<<32 - 1) // an unlikely value
+ for _, x := range run {
+ if x != prev {
+ run[k] = x
k++
prev = x
}
}
- run.Infos = infos[0:k]
+ run = run[0:k]
}
return run
@@ -229,8 +242,13 @@ func (p *Pak) less(q *Pak) bool {
// A File describes a Go file.
type File struct {
- Path string // complete file name
- Pak Pak // the package to which the file belongs
+ Name string // directory-local file name
+ Pak *Pak // the package to which the file belongs
+}
+
+// Path returns the file path of f.
+func (f *File) Path() string {
+ return pathpkg.Join(f.Pak.Path, f.Name)
}
// A Spot describes a single occurrence of a word.
@@ -242,30 +260,34 @@ type Spot struct {
// A FileRun is a list of KindRuns belonging to the same file.
type FileRun struct {
File *File
- Groups []*KindRun
+ Groups []KindRun
}
-// Spots are sorted by path for the reduction into FileRuns.
-func lessSpot(x, y interface{}) bool { return x.(Spot).File.Path < y.(Spot).File.Path }
+// Spots are sorted by file path for the reduction into FileRuns.
+func lessSpot(x, y interface{}) bool {
+ fx := x.(Spot).File
+ fy := y.(Spot).File
+ // same as "return fx.Path() < fy.Path()" but w/o computing the file path first
+ px := fx.Pak.Path
+ py := fy.Pak.Path
+ return px < py || px == py && fx.Name < fy.Name
+}
-// newFileRun allocates a new FileRun from the Spot run [i, j) in h.
-func newFileRun(h0 *RunList, i, j int) interface{} {
- file := h0.At(i).(Spot).File
+// newFileRun allocates a new FileRun from the Spot run h.
+func newFileRun(h RunList) interface{} {
+ file := h[0].(Spot).File
// reduce the list of Spots into a list of KindRuns
- var h1 RunList
- h1.Vector.Resize(j-i, 0)
- k := 0
- for ; i < j; i++ {
- h1.Set(k, h0.At(i).(Spot).Info)
- k++
+ h1 := make(RunList, len(h))
+ for i, x := range h {
+ h1[i] = x.(Spot).Info
}
h2 := h1.reduce(lessKind, newKindRun)
// create the FileRun
- groups := make([]*KindRun, h2.Len())
- for i := 0; i < h2.Len(); i++ {
- groups[i] = h2.At(i).(*KindRun)
+ groups := make([]KindRun, len(h2))
+ for i, x := range h2 {
+ groups[i] = x.(KindRun)
}
return &FileRun{file, groups}
}
@@ -275,28 +297,26 @@ func newFileRun(h0 *RunList, i, j int) interface{} {
// A PakRun describes a run of *FileRuns of a package.
type PakRun struct {
- Pak Pak
+ Pak *Pak
Files []*FileRun
}
// Sorting support for files within a PakRun.
func (p *PakRun) Len() int { return len(p.Files) }
-func (p *PakRun) Less(i, j int) bool { return p.Files[i].File.Path < p.Files[j].File.Path }
+func (p *PakRun) Less(i, j int) bool { return p.Files[i].File.Name < p.Files[j].File.Name }
func (p *PakRun) Swap(i, j int) { p.Files[i], p.Files[j] = p.Files[j], p.Files[i] }
// FileRuns are sorted by package for the reduction into PakRuns.
func lessFileRun(x, y interface{}) bool {
- return x.(*FileRun).File.Pak.less(&y.(*FileRun).File.Pak)
+ return x.(*FileRun).File.Pak.less(y.(*FileRun).File.Pak)
}
-// newPakRun allocates a new PakRun from the *FileRun run [i, j) in h.
-func newPakRun(h *RunList, i, j int) interface{} {
- pak := h.At(i).(*FileRun).File.Pak
- files := make([]*FileRun, j-i)
- k := 0
- for ; i < j; i++ {
- files[k] = h.At(i).(*FileRun)
- k++
+// newPakRun allocates a new PakRun from the *FileRun run h.
+func newPakRun(h RunList) interface{} {
+ pak := h[0].(*FileRun).File.Pak
+ files := make([]*FileRun, len(h))
+ for i, x := range h {
+ files[i] = x.(*FileRun)
}
run := &PakRun{pak, files}
sort.Sort(run) // files were sorted by package; sort them by file now
@@ -310,9 +330,9 @@ func newPakRun(h *RunList, i, j int) interface{} {
type HitList []*PakRun
// PakRuns are sorted by package.
-func lessPakRun(x, y interface{}) bool { return x.(*PakRun).Pak.less(&y.(*PakRun).Pak) }
+func lessPakRun(x, y interface{}) bool { return x.(*PakRun).Pak.less(y.(*PakRun).Pak) }
-func reduce(h0 *RunList) HitList {
+func reduce(h0 RunList) HitList {
// reduce a list of Spots into a list of FileRuns
h1 := h0.reduce(lessSpot, newFileRun)
// reduce a list of FileRuns into a list of PakRuns
@@ -320,28 +340,20 @@ func reduce(h0 *RunList) HitList {
// sort the list of PakRuns by package
h2.sort(lessPakRun)
// create a HitList
- h := make(HitList, h2.Len())
- for i := 0; i < h2.Len(); i++ {
- h[i] = h2.At(i).(*PakRun)
+ h := make(HitList, len(h2))
+ for i, p := range h2 {
+ h[i] = p.(*PakRun)
}
return h
}
+// filter returns a new HitList created by filtering
+// all PakRuns from h that have a matching pakname.
func (h HitList) filter(pakname string) HitList {
- // determine number of matching packages (most of the time just one)
- n := 0
- for _, p := range h {
- if p.Pak.Name == pakname {
- n++
- }
- }
- // create filtered HitList
- hh := make(HitList, n)
- i := 0
+ var hh HitList
for _, p := range h {
if p.Pak.Name == pakname {
- hh[i] = p
- i++
+ hh = append(hh, p)
}
}
return hh
@@ -365,34 +377,27 @@ type AltWords struct {
// wordPairs are sorted by their canonical spelling.
func lessWordPair(x, y interface{}) bool { return x.(*wordPair).canon < y.(*wordPair).canon }
-// newAltWords allocates a new AltWords from the *wordPair run [i, j) in h.
-func newAltWords(h *RunList, i, j int) interface{} {
- canon := h.At(i).(*wordPair).canon
- alts := make([]string, j-i)
- k := 0
- for ; i < j; i++ {
- alts[k] = h.At(i).(*wordPair).alt
- k++
+// newAltWords allocates a new AltWords from the *wordPair run h.
+func newAltWords(h RunList) interface{} {
+ canon := h[0].(*wordPair).canon
+ alts := make([]string, len(h))
+ for i, x := range h {
+ alts[i] = x.(*wordPair).alt
}
return &AltWords{canon, alts}
}
func (a *AltWords) filter(s string) *AltWords {
- if len(a.Alts) == 1 && a.Alts[0] == s {
- // there are no different alternatives
- return nil
- }
-
- // make a new AltWords with the current spelling removed
- alts := make([]string, len(a.Alts))
- i := 0
+ var alts []string
for _, w := range a.Alts {
if w != s {
- alts[i] = w
- i++
+ alts = append(alts, w)
}
}
- return &AltWords{a.Canon, alts[0:i]}
+ if len(alts) > 0 {
+ return &AltWords{a.Canon, alts}
+ }
+ return nil
}
// ----------------------------------------------------------------------------
@@ -423,17 +428,32 @@ type Statistics struct {
type Indexer struct {
fset *token.FileSet // file set for all indexed files
sources bytes.Buffer // concatenated sources
+ packages map[string]*Pak // map of canonicalized *Paks
words map[string]*IndexResult // RunLists of Spots
- snippets vector.Vector // vector of *Snippets, indexed by snippet indices
+ snippets []*Snippet // indices are stored in SpotInfos
current *token.File // last file added to file set
file *File // AST for current file
decl ast.Decl // AST for current decl
stats Statistics
}
+func (x *Indexer) lookupPackage(path, name string) *Pak {
+ // In the source directory tree, more than one package may
+ // live in the same directory. For the packages map, construct
+ // a key that includes both the directory path and the package
+ // name.
+ key := path + ":" + name
+ pak := x.packages[key]
+ if pak == nil {
+ pak = &Pak{path, name}
+ x.packages[key] = pak
+ }
+ return pak
+}
+
func (x *Indexer) addSnippet(s *Snippet) int {
- index := x.snippets.Len()
- x.snippets.Push(s)
+ index := len(x.snippets)
+ x.snippets = append(x.snippets, s)
return index
}
@@ -454,12 +474,12 @@ func (x *Indexer) visitIdent(kind SpotKind, id *ast.Ident) {
if kind == Use || x.decl == nil {
// not a declaration or no snippet required
info := makeSpotInfo(kind, x.current.Line(id.Pos()), false)
- lists.Others.Push(Spot{x.file, info})
+ lists.Others = append(lists.Others, Spot{x.file, info})
} else {
// a declaration with snippet
index := x.addSnippet(NewSnippet(x.fset, x.decl, id))
info := makeSpotInfo(kind, index, true)
- lists.Decls.Push(Spot{x.file, info})
+ lists.Decls = append(lists.Decls, Spot{x.file, info})
}
x.stats.Spots++
@@ -675,7 +695,7 @@ var whitelisted = map[string]bool{
// of "permitted" files for indexing. The filename must
// be the directory-local name of the file.
func isWhitelisted(filename string) bool {
- key := filepath.Ext(filename)
+ key := pathpkg.Ext(filename)
if key == "" {
// file has no extension - use entire filename
key = filename
@@ -683,12 +703,12 @@ func isWhitelisted(filename string) bool {
return whitelisted[key]
}
-func (x *Indexer) visitFile(dirname string, f FileInfo, fulltextIndex bool) {
- if !f.IsRegular() {
+func (x *Indexer) visitFile(dirname string, f os.FileInfo, fulltextIndex bool) {
+ if f.IsDir() {
return
}
- filename := filepath.Join(dirname, f.Name())
+ filename := pathpkg.Join(dirname, f.Name())
goFile := false
switch {
@@ -713,9 +733,8 @@ func (x *Indexer) visitFile(dirname string, f FileInfo, fulltextIndex bool) {
if fast != nil {
// we've got a Go file to index
x.current = file
- dir, _ := filepath.Split(filename)
- pak := Pak{dir, fast.Name.Name}
- x.file = &File{filename, pak}
+ pak := x.lookupPackage(dirname, fast.Name.Name)
+ x.file = &File{f.Name(), pak}
ast.Walk(x, fast)
}
@@ -747,12 +766,15 @@ func canonical(w string) string { return strings.ToLower(w) }
// NewIndex creates a new index for the .go files
// in the directories given by dirnames.
//
-func NewIndex(dirnames <-chan string, fulltextIndex bool) *Index {
+func NewIndex(dirnames <-chan string, fulltextIndex bool, throttle float64) *Index {
var x Indexer
+ th := NewThrottle(throttle, 100*time.Millisecond) // run at least 0.1s at a time
// initialize Indexer
+ // (use some reasonably sized maps to start)
x.fset = token.NewFileSet()
- x.words = make(map[string]*IndexResult)
+ x.packages = make(map[string]*Pak, 256)
+ x.words = make(map[string]*IndexResult, 8192)
// index all files in the directories given by dirnames
for dirname := range dirnames {
@@ -761,9 +783,10 @@ func NewIndex(dirnames <-chan string, fulltextIndex bool) *Index {
continue // ignore this directory
}
for _, f := range list {
- if !f.IsDirectory() {
+ if !f.IsDir() {
x.visitFile(dirname, f, fulltextIndex)
}
+ th.Throttle()
}
}
@@ -782,13 +805,14 @@ func NewIndex(dirnames <-chan string, fulltextIndex bool) *Index {
words := make(map[string]*LookupResult)
var wlist RunList
for w, h := range x.words {
- decls := reduce(&h.Decls)
- others := reduce(&h.Others)
+ decls := reduce(h.Decls)
+ others := reduce(h.Others)
words[w] = &LookupResult{
Decls: decls,
Others: others,
}
- wlist.Push(&wordPair{canonical(w), w})
+ wlist = append(wlist, &wordPair{canonical(w), w})
+ th.Throttle()
}
x.stats.Words = len(words)
@@ -798,24 +822,92 @@ func NewIndex(dirnames <-chan string, fulltextIndex bool) *Index {
// convert alist into a map of alternative spellings
alts := make(map[string]*AltWords)
- for i := 0; i < alist.Len(); i++ {
- a := alist.At(i).(*AltWords)
+ for i := 0; i < len(alist); i++ {
+ a := alist[i].(*AltWords)
alts[a.Canon] = a
}
- // convert snippet vector into a list
- snippets := make([]*Snippet, x.snippets.Len())
- for i := 0; i < x.snippets.Len(); i++ {
- snippets[i] = x.snippets.At(i).(*Snippet)
- }
-
// create text index
var suffixes *suffixarray.Index
if fulltextIndex {
suffixes = suffixarray.New(x.sources.Bytes())
}
- return &Index{x.fset, suffixes, words, alts, snippets, x.stats}
+ return &Index{x.fset, suffixes, words, alts, x.snippets, x.stats}
+}
+
+type fileIndex struct {
+ Words map[string]*LookupResult
+ Alts map[string]*AltWords
+ Snippets []*Snippet
+ Fulltext bool
+}
+
+func (x *fileIndex) Write(w io.Writer) error {
+ return gob.NewEncoder(w).Encode(x)
+}
+
+func (x *fileIndex) Read(r io.Reader) error {
+ return gob.NewDecoder(r).Decode(x)
+}
+
+// Write writes the index x to w.
+func (x *Index) Write(w io.Writer) error {
+ fulltext := false
+ if x.suffixes != nil {
+ fulltext = true
+ }
+ fx := fileIndex{
+ x.words,
+ x.alts,
+ x.snippets,
+ fulltext,
+ }
+ if err := fx.Write(w); err != nil {
+ return err
+ }
+ if fulltext {
+ encode := func(x interface{}) error {
+ return gob.NewEncoder(w).Encode(x)
+ }
+ if err := x.fset.Write(encode); err != nil {
+ return err
+ }
+ if err := x.suffixes.Write(w); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+// Read reads the index from r into x; x must not be nil.
+// If r does not also implement io.ByteReader, it will be wrapped in a bufio.Reader.
+func (x *Index) Read(r io.Reader) error {
+ // We use the ability to read bytes as a plausible surrogate for buffering.
+ if _, ok := r.(io.ByteReader); !ok {
+ r = bufio.NewReader(r)
+ }
+ var fx fileIndex
+ if err := fx.Read(r); err != nil {
+ return err
+ }
+ x.words = fx.Words
+ x.alts = fx.Alts
+ x.snippets = fx.Snippets
+ if fx.Fulltext {
+ x.fset = token.NewFileSet()
+ decode := func(x interface{}) error {
+ return gob.NewDecoder(r).Decode(x)
+ }
+ if err := x.fset.Read(decode); err != nil {
+ return err
+ }
+ x.suffixes = new(suffixarray.Index)
+ if err := x.suffixes.Read(r); err != nil {
+ return err
+ }
+ }
+ return nil
}
// Stats() returns index statistics.
@@ -823,7 +915,7 @@ func (x *Index) Stats() Statistics {
return x.stats
}
-func (x *Index) LookupWord(w string) (match *LookupResult, alt *AltWords) {
+func (x *Index) lookupWord(w string) (match *LookupResult, alt *AltWords) {
match = x.words[w]
alt = x.alts[canonical(w)]
// remove current spelling from alternatives
@@ -835,47 +927,56 @@ func (x *Index) LookupWord(w string) (match *LookupResult, alt *AltWords) {
return
}
+// isIdentifier reports whether s is a Go identifier.
func isIdentifier(s string) bool {
- var S scanner.Scanner
- fset := token.NewFileSet()
- S.Init(fset.AddFile("", fset.Base(), len(s)), []byte(s), nil, 0)
- if _, tok, _ := S.Scan(); tok == token.IDENT {
- _, tok, _ := S.Scan()
- return tok == token.EOF
+ for i, ch := range s {
+ if unicode.IsLetter(ch) || ch == ' ' || i > 0 && unicode.IsDigit(ch) {
+ continue
+ }
+ return false
}
- return false
+ return len(s) > 0
}
// For a given query, which is either a single identifier or a qualified
-// identifier, Lookup returns a LookupResult, and a list of alternative
-// spellings, if any. If the query syntax is wrong, an error is reported.
-func (x *Index) Lookup(query string) (match *LookupResult, alt *AltWords, err os.Error) {
+// identifier, Lookup returns a list of packages, a LookupResult, and a
+// list of alternative spellings, if any. Any and all results may be nil.
+// If the query syntax is wrong, an error is reported.
+func (x *Index) Lookup(query string) (paks HitList, match *LookupResult, alt *AltWords, err error) {
ss := strings.Split(query, ".")
// check query syntax
for _, s := range ss {
if !isIdentifier(s) {
- err = os.NewError("all query parts must be identifiers")
+ err = errors.New("all query parts must be identifiers")
return
}
}
+ // handle simple and qualified identifiers
switch len(ss) {
case 1:
- match, alt = x.LookupWord(ss[0])
+ ident := ss[0]
+ match, alt = x.lookupWord(ident)
+ if match != nil {
+ // found a match - filter packages with same name
+ // for the list of packages called ident, if any
+ paks = match.Others.filter(ident)
+ }
case 2:
- pakname := ss[0]
- match, alt = x.LookupWord(ss[1])
+ pakname, ident := ss[0], ss[1]
+ match, alt = x.lookupWord(ident)
if match != nil {
// found a match - filter by package name
+ // (no paks - package names are not qualified)
decls := match.Decls.filter(pakname)
others := match.Others.filter(pakname)
match = &LookupResult{decls, others}
}
default:
- err = os.NewError("query is not a (qualified) identifier")
+ err = errors.New("query is not a (qualified) identifier")
}
return