diff options
Diffstat (limited to 'src/cmd/godoc/index.go')
| -rw-r--r-- | src/cmd/godoc/index.go | 1073 |
1 files changed, 0 insertions, 1073 deletions
diff --git a/src/cmd/godoc/index.go b/src/cmd/godoc/index.go deleted file mode 100644 index 8198fca0d..000000000 --- a/src/cmd/godoc/index.go +++ /dev/null @@ -1,1073 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// This file contains the infrastructure to create an -// identifier and full-text index for a set of Go files. -// -// Algorithm for identifier index: -// - traverse all .go files of the file tree specified by root -// - for each identifier (word) encountered, collect all occurrences (spots) -// into a list; this produces a list of spots for each word -// - reduce the lists: from a list of spots to a list of FileRuns, -// and from a list of FileRuns into a list of PakRuns -// - make a HitList from the PakRuns -// -// Details: -// - keep two lists per word: one containing package-level declarations -// that have snippets, and one containing all other spots -// - keep the snippets in a separate table indexed by snippet index -// and store the snippet index in place of the line number in a SpotInfo -// (the line number for spots with snippets is stored in the snippet) -// - at the end, create lists of alternative spellings for a given -// word -// -// Algorithm for full text index: -// - concatenate all source code in a byte buffer (in memory) -// - add the files to a file set in lockstep as they are added to the byte -// buffer such that a byte buffer offset corresponds to the Pos value for -// that file location -// - create a suffix array from the concatenated sources -// -// String lookup in full text index: -// - use the suffix array to lookup a string's offsets - the offsets -// correspond to the Pos values relative to the file set -// - translate the Pos values back into file and line information and -// sort the result - -package main - -import ( - "bufio" - "bytes" - "encoding/gob" - "errors" - "go/ast" - "go/parser" - "go/token" - "index/suffixarray" - "io" - "os" - pathpkg "path" - "regexp" - "sort" - "strings" - "time" - "unicode" -) - -// ---------------------------------------------------------------------------- -// InterfaceSlice is a helper type for sorting interface -// slices according to some slice-specific sort criteria. - -type Comparer func(x, y interface{}) bool - -type InterfaceSlice struct { - slice []interface{} - less Comparer -} - -func (p *InterfaceSlice) Len() int { return len(p.slice) } -func (p *InterfaceSlice) Less(i, j int) bool { return p.less(p.slice[i], p.slice[j]) } -func (p *InterfaceSlice) Swap(i, j int) { p.slice[i], p.slice[j] = p.slice[j], p.slice[i] } - -// ---------------------------------------------------------------------------- -// RunList - -// A RunList is a list of entries that can be sorted according to some -// criteria. A RunList may be compressed by grouping "runs" of entries -// which are equal (according to the sort critera) into a new RunList of -// runs. For instance, a RunList containing pairs (x, y) may be compressed -// into a RunList containing pair runs (x, {y}) where each run consists of -// a list of y's with the same x. -type RunList []interface{} - -func (h RunList) sort(less Comparer) { - sort.Sort(&InterfaceSlice{h, less}) -} - -// Compress entries which are the same according to a sort criteria -// (specified by less) into "runs". -func (h RunList) reduce(less Comparer, newRun func(h RunList) interface{}) RunList { - if len(h) == 0 { - return nil - } - // len(h) > 0 - - // create runs of entries with equal values - h.sort(less) - - // for each run, make a new run object and collect them in a new RunList - var hh RunList - i, x := 0, h[0] - for j, y := range h { - if less(x, y) { - hh = append(hh, newRun(h[i:j])) - i, x = j, h[j] // start a new run - } - } - // add final run, if any - if i < len(h) { - hh = append(hh, newRun(h[i:])) - } - - return hh -} - -// ---------------------------------------------------------------------------- -// SpotInfo - -// A SpotInfo value describes a particular identifier spot in a given file; -// It encodes three values: the SpotKind (declaration or use), a line or -// snippet index "lori", and whether it's a line or index. -// -// The following encoding is used: -// -// bits 32 4 1 0 -// value [lori|kind|isIndex] -// -type SpotInfo uint32 - -// SpotKind describes whether an identifier is declared (and what kind of -// declaration) or used. -type SpotKind uint32 - -const ( - PackageClause SpotKind = iota - ImportDecl - ConstDecl - TypeDecl - VarDecl - FuncDecl - MethodDecl - Use - nKinds -) - -func init() { - // sanity check: if nKinds is too large, the SpotInfo - // accessor functions may need to be updated - if nKinds > 8 { - panic("internal error: nKinds > 8") - } -} - -// makeSpotInfo makes a SpotInfo. -func makeSpotInfo(kind SpotKind, lori int, isIndex bool) SpotInfo { - // encode lori: bits [4..32) - x := SpotInfo(lori) << 4 - if int(x>>4) != lori { - // lori value doesn't fit - since snippet indices are - // most certainly always smaller then 1<<28, this can - // only happen for line numbers; give it no line number (= 0) - x = 0 - } - // encode kind: bits [1..4) - x |= SpotInfo(kind) << 1 - // encode isIndex: bit 0 - if isIndex { - x |= 1 - } - return x -} - -func (x SpotInfo) Kind() SpotKind { return SpotKind(x >> 1 & 7) } -func (x SpotInfo) Lori() int { return int(x >> 4) } -func (x SpotInfo) IsIndex() bool { return x&1 != 0 } - -// ---------------------------------------------------------------------------- -// KindRun - -// Debugging support. Disable to see multiple entries per line. -const removeDuplicates = true - -// A KindRun is a run of SpotInfos of the same kind in a given file. -// The kind (3 bits) is stored in each SpotInfo element; to find the -// kind of a KindRun, look at any of it's elements. -type KindRun []SpotInfo - -// KindRuns are sorted by line number or index. Since the isIndex bit -// is always the same for all infos in one list we can compare lori's. -func (k KindRun) Len() int { return len(k) } -func (k KindRun) Less(i, j int) bool { return k[i].Lori() < k[j].Lori() } -func (k KindRun) Swap(i, j int) { k[i], k[j] = k[j], k[i] } - -// FileRun contents are sorted by Kind for the reduction into KindRuns. -func lessKind(x, y interface{}) bool { return x.(SpotInfo).Kind() < y.(SpotInfo).Kind() } - -// newKindRun allocates a new KindRun from the SpotInfo run h. -func newKindRun(h RunList) interface{} { - run := make(KindRun, len(h)) - for i, x := range h { - run[i] = x.(SpotInfo) - } - - // Spots were sorted by file and kind to create this run. - // Within this run, sort them by line number or index. - sort.Sort(run) - - if removeDuplicates { - // Since both the lori and kind field must be - // same for duplicates, and since the isIndex - // bit is always the same for all infos in one - // list we can simply compare the entire info. - k := 0 - prev := SpotInfo(1<<32 - 1) // an unlikely value - for _, x := range run { - if x != prev { - run[k] = x - k++ - prev = x - } - } - run = run[0:k] - } - - return run -} - -// ---------------------------------------------------------------------------- -// FileRun - -// A Pak describes a Go package. -type Pak struct { - Path string // path of directory containing the package - Name string // package name as declared by package clause -} - -// Paks are sorted by name (primary key) and by import path (secondary key). -func (p *Pak) less(q *Pak) bool { - return p.Name < q.Name || p.Name == q.Name && p.Path < q.Path -} - -// A File describes a Go file. -type File struct { - Name string // directory-local file name - Pak *Pak // the package to which the file belongs -} - -// Path returns the file path of f. -func (f *File) Path() string { - return pathpkg.Join(f.Pak.Path, f.Name) -} - -// A Spot describes a single occurrence of a word. -type Spot struct { - File *File - Info SpotInfo -} - -// A FileRun is a list of KindRuns belonging to the same file. -type FileRun struct { - File *File - Groups []KindRun -} - -// Spots are sorted by file path for the reduction into FileRuns. -func lessSpot(x, y interface{}) bool { - fx := x.(Spot).File - fy := y.(Spot).File - // same as "return fx.Path() < fy.Path()" but w/o computing the file path first - px := fx.Pak.Path - py := fy.Pak.Path - return px < py || px == py && fx.Name < fy.Name -} - -// newFileRun allocates a new FileRun from the Spot run h. -func newFileRun(h RunList) interface{} { - file := h[0].(Spot).File - - // reduce the list of Spots into a list of KindRuns - h1 := make(RunList, len(h)) - for i, x := range h { - h1[i] = x.(Spot).Info - } - h2 := h1.reduce(lessKind, newKindRun) - - // create the FileRun - groups := make([]KindRun, len(h2)) - for i, x := range h2 { - groups[i] = x.(KindRun) - } - return &FileRun{file, groups} -} - -// ---------------------------------------------------------------------------- -// PakRun - -// A PakRun describes a run of *FileRuns of a package. -type PakRun struct { - Pak *Pak - Files []*FileRun -} - -// Sorting support for files within a PakRun. -func (p *PakRun) Len() int { return len(p.Files) } -func (p *PakRun) Less(i, j int) bool { return p.Files[i].File.Name < p.Files[j].File.Name } -func (p *PakRun) Swap(i, j int) { p.Files[i], p.Files[j] = p.Files[j], p.Files[i] } - -// FileRuns are sorted by package for the reduction into PakRuns. -func lessFileRun(x, y interface{}) bool { - return x.(*FileRun).File.Pak.less(y.(*FileRun).File.Pak) -} - -// newPakRun allocates a new PakRun from the *FileRun run h. -func newPakRun(h RunList) interface{} { - pak := h[0].(*FileRun).File.Pak - files := make([]*FileRun, len(h)) - for i, x := range h { - files[i] = x.(*FileRun) - } - run := &PakRun{pak, files} - sort.Sort(run) // files were sorted by package; sort them by file now - return run -} - -// ---------------------------------------------------------------------------- -// HitList - -// A HitList describes a list of PakRuns. -type HitList []*PakRun - -// PakRuns are sorted by package. -func lessPakRun(x, y interface{}) bool { return x.(*PakRun).Pak.less(y.(*PakRun).Pak) } - -func reduce(h0 RunList) HitList { - // reduce a list of Spots into a list of FileRuns - h1 := h0.reduce(lessSpot, newFileRun) - // reduce a list of FileRuns into a list of PakRuns - h2 := h1.reduce(lessFileRun, newPakRun) - // sort the list of PakRuns by package - h2.sort(lessPakRun) - // create a HitList - h := make(HitList, len(h2)) - for i, p := range h2 { - h[i] = p.(*PakRun) - } - return h -} - -// filter returns a new HitList created by filtering -// all PakRuns from h that have a matching pakname. -func (h HitList) filter(pakname string) HitList { - var hh HitList - for _, p := range h { - if p.Pak.Name == pakname { - hh = append(hh, p) - } - } - return hh -} - -// ---------------------------------------------------------------------------- -// AltWords - -type wordPair struct { - canon string // canonical word spelling (all lowercase) - alt string // alternative spelling -} - -// An AltWords describes a list of alternative spellings for a -// canonical (all lowercase) spelling of a word. -type AltWords struct { - Canon string // canonical word spelling (all lowercase) - Alts []string // alternative spelling for the same word -} - -// wordPairs are sorted by their canonical spelling. -func lessWordPair(x, y interface{}) bool { return x.(*wordPair).canon < y.(*wordPair).canon } - -// newAltWords allocates a new AltWords from the *wordPair run h. -func newAltWords(h RunList) interface{} { - canon := h[0].(*wordPair).canon - alts := make([]string, len(h)) - for i, x := range h { - alts[i] = x.(*wordPair).alt - } - return &AltWords{canon, alts} -} - -func (a *AltWords) filter(s string) *AltWords { - var alts []string - for _, w := range a.Alts { - if w != s { - alts = append(alts, w) - } - } - if len(alts) > 0 { - return &AltWords{a.Canon, alts} - } - return nil -} - -// ---------------------------------------------------------------------------- -// Indexer - -// Adjust these flags as seems best. -const includeMainPackages = true -const includeTestFiles = true - -type IndexResult struct { - Decls RunList // package-level declarations (with snippets) - Others RunList // all other occurrences -} - -// Statistics provides statistics information for an index. -type Statistics struct { - Bytes int // total size of indexed source files - Files int // number of indexed source files - Lines int // number of lines (all files) - Words int // number of different identifiers - Spots int // number of identifier occurrences -} - -// An Indexer maintains the data structures and provides the machinery -// for indexing .go files under a file tree. It implements the path.Visitor -// interface for walking file trees, and the ast.Visitor interface for -// walking Go ASTs. -type Indexer struct { - fset *token.FileSet // file set for all indexed files - sources bytes.Buffer // concatenated sources - packages map[string]*Pak // map of canonicalized *Paks - words map[string]*IndexResult // RunLists of Spots - snippets []*Snippet // indices are stored in SpotInfos - current *token.File // last file added to file set - file *File // AST for current file - decl ast.Decl // AST for current decl - stats Statistics -} - -func (x *Indexer) lookupPackage(path, name string) *Pak { - // In the source directory tree, more than one package may - // live in the same directory. For the packages map, construct - // a key that includes both the directory path and the package - // name. - key := path + ":" + name - pak := x.packages[key] - if pak == nil { - pak = &Pak{path, name} - x.packages[key] = pak - } - return pak -} - -func (x *Indexer) addSnippet(s *Snippet) int { - index := len(x.snippets) - x.snippets = append(x.snippets, s) - return index -} - -func (x *Indexer) visitIdent(kind SpotKind, id *ast.Ident) { - if id != nil { - lists, found := x.words[id.Name] - if !found { - lists = new(IndexResult) - x.words[id.Name] = lists - } - - if kind == Use || x.decl == nil { - // not a declaration or no snippet required - info := makeSpotInfo(kind, x.current.Line(id.Pos()), false) - lists.Others = append(lists.Others, Spot{x.file, info}) - } else { - // a declaration with snippet - index := x.addSnippet(NewSnippet(x.fset, x.decl, id)) - info := makeSpotInfo(kind, index, true) - lists.Decls = append(lists.Decls, Spot{x.file, info}) - } - - x.stats.Spots++ - } -} - -func (x *Indexer) visitFieldList(kind SpotKind, list *ast.FieldList) { - for _, f := range list.List { - x.decl = nil // no snippets for fields - for _, name := range f.Names { - x.visitIdent(kind, name) - } - ast.Walk(x, f.Type) - // ignore tag - not indexed at the moment - } -} - -func (x *Indexer) visitSpec(kind SpotKind, spec ast.Spec) { - switch n := spec.(type) { - case *ast.ImportSpec: - x.visitIdent(ImportDecl, n.Name) - // ignore path - not indexed at the moment - - case *ast.ValueSpec: - for _, n := range n.Names { - x.visitIdent(kind, n) - } - ast.Walk(x, n.Type) - for _, v := range n.Values { - ast.Walk(x, v) - } - - case *ast.TypeSpec: - x.visitIdent(TypeDecl, n.Name) - ast.Walk(x, n.Type) - } -} - -func (x *Indexer) visitGenDecl(decl *ast.GenDecl) { - kind := VarDecl - if decl.Tok == token.CONST { - kind = ConstDecl - } - x.decl = decl - for _, s := range decl.Specs { - x.visitSpec(kind, s) - } -} - -func (x *Indexer) Visit(node ast.Node) ast.Visitor { - switch n := node.(type) { - case nil: - // nothing to do - - case *ast.Ident: - x.visitIdent(Use, n) - - case *ast.FieldList: - x.visitFieldList(VarDecl, n) - - case *ast.InterfaceType: - x.visitFieldList(MethodDecl, n.Methods) - - case *ast.DeclStmt: - // local declarations should only be *ast.GenDecls; - // ignore incorrect ASTs - if decl, ok := n.Decl.(*ast.GenDecl); ok { - x.decl = nil // no snippets for local declarations - x.visitGenDecl(decl) - } - - case *ast.GenDecl: - x.decl = n - x.visitGenDecl(n) - - case *ast.FuncDecl: - kind := FuncDecl - if n.Recv != nil { - kind = MethodDecl - ast.Walk(x, n.Recv) - } - x.decl = n - x.visitIdent(kind, n.Name) - ast.Walk(x, n.Type) - if n.Body != nil { - ast.Walk(x, n.Body) - } - - case *ast.File: - x.decl = nil - x.visitIdent(PackageClause, n.Name) - for _, d := range n.Decls { - ast.Walk(x, d) - } - - default: - return x - } - - return nil -} - -func pkgName(filename string) string { - // use a new file set each time in order to not pollute the indexer's - // file set (which must stay in sync with the concatenated source code) - file, err := parser.ParseFile(token.NewFileSet(), filename, nil, parser.PackageClauseOnly) - if err != nil || file == nil { - return "" - } - return file.Name.Name -} - -// addFile adds a file to the index if possible and returns the file set file -// and the file's AST if it was successfully parsed as a Go file. If addFile -// failed (that is, if the file was not added), it returns file == nil. -func (x *Indexer) addFile(filename string, goFile bool) (file *token.File, ast *ast.File) { - // open file - f, err := fs.Open(filename) - if err != nil { - return - } - defer f.Close() - - // The file set's base offset and x.sources size must be in lock-step; - // this permits the direct mapping of suffix array lookup results to - // to corresponding Pos values. - // - // When a file is added to the file set, its offset base increases by - // the size of the file + 1; and the initial base offset is 1. Add an - // extra byte to the sources here. - x.sources.WriteByte(0) - - // If the sources length doesn't match the file set base at this point - // the file set implementation changed or we have another error. - base := x.fset.Base() - if x.sources.Len() != base { - panic("internal error: file base incorrect") - } - - // append file contents (src) to x.sources - if _, err := x.sources.ReadFrom(f); err == nil { - src := x.sources.Bytes()[base:] - - if goFile { - // parse the file and in the process add it to the file set - if ast, err = parser.ParseFile(x.fset, filename, src, parser.ParseComments); err == nil { - file = x.fset.File(ast.Pos()) // ast.Pos() is inside the file - return - } - // file has parse errors, and the AST may be incorrect - - // set lines information explicitly and index as ordinary - // text file (cannot fall through to the text case below - // because the file has already been added to the file set - // by the parser) - file = x.fset.File(token.Pos(base)) // token.Pos(base) is inside the file - file.SetLinesForContent(src) - ast = nil - return - } - - if isText(src) { - // only add the file to the file set (for the full text index) - file = x.fset.AddFile(filename, x.fset.Base(), len(src)) - file.SetLinesForContent(src) - return - } - } - - // discard possibly added data - x.sources.Truncate(base - 1) // -1 to remove added byte 0 since no file was added - return -} - -// Design note: Using an explicit white list of permitted files for indexing -// makes sure that the important files are included and massively reduces the -// number of files to index. The advantage over a blacklist is that unexpected -// (non-blacklisted) files won't suddenly explode the index. - -// Files are whitelisted if they have a file name or extension -// present as key in whitelisted. -var whitelisted = map[string]bool{ - ".bash": true, - ".c": true, - ".css": true, - ".go": true, - ".goc": true, - ".h": true, - ".html": true, - ".js": true, - ".out": true, - ".py": true, - ".s": true, - ".sh": true, - ".txt": true, - ".xml": true, - "AUTHORS": true, - "CONTRIBUTORS": true, - "LICENSE": true, - "Makefile": true, - "PATENTS": true, - "README": true, -} - -// isWhitelisted returns true if a file is on the list -// of "permitted" files for indexing. The filename must -// be the directory-local name of the file. -func isWhitelisted(filename string) bool { - key := pathpkg.Ext(filename) - if key == "" { - // file has no extension - use entire filename - key = filename - } - return whitelisted[key] -} - -func (x *Indexer) visitFile(dirname string, f os.FileInfo, fulltextIndex bool) { - if f.IsDir() { - return - } - - filename := pathpkg.Join(dirname, f.Name()) - goFile := false - - switch { - case isGoFile(f): - if !includeTestFiles && (!isPkgFile(f) || strings.HasPrefix(filename, "test/")) { - return - } - if !includeMainPackages && pkgName(filename) == "main" { - return - } - goFile = true - - case !fulltextIndex || !isWhitelisted(f.Name()): - return - } - - file, fast := x.addFile(filename, goFile) - if file == nil { - return // addFile failed - } - - if fast != nil { - // we've got a Go file to index - x.current = file - pak := x.lookupPackage(dirname, fast.Name.Name) - x.file = &File{f.Name(), pak} - ast.Walk(x, fast) - } - - // update statistics - x.stats.Bytes += file.Size() - x.stats.Files++ - x.stats.Lines += file.LineCount() -} - -// ---------------------------------------------------------------------------- -// Index - -type LookupResult struct { - Decls HitList // package-level declarations (with snippets) - Others HitList // all other occurrences -} - -type Index struct { - fset *token.FileSet // file set used during indexing; nil if no textindex - suffixes *suffixarray.Index // suffixes for concatenated sources; nil if no textindex - words map[string]*LookupResult // maps words to hit lists - alts map[string]*AltWords // maps canonical(words) to lists of alternative spellings - snippets []*Snippet // all snippets, indexed by snippet index - stats Statistics -} - -func canonical(w string) string { return strings.ToLower(w) } - -// NewIndex creates a new index for the .go files -// in the directories given by dirnames. -// -func NewIndex(dirnames <-chan string, fulltextIndex bool, throttle float64) *Index { - var x Indexer - th := NewThrottle(throttle, 100*time.Millisecond) // run at least 0.1s at a time - - // initialize Indexer - // (use some reasonably sized maps to start) - x.fset = token.NewFileSet() - x.packages = make(map[string]*Pak, 256) - x.words = make(map[string]*IndexResult, 8192) - - // index all files in the directories given by dirnames - for dirname := range dirnames { - list, err := fs.ReadDir(dirname) - if err != nil { - continue // ignore this directory - } - for _, f := range list { - if !f.IsDir() { - x.visitFile(dirname, f, fulltextIndex) - } - th.Throttle() - } - } - - if !fulltextIndex { - // the file set, the current file, and the sources are - // not needed after indexing if no text index is built - - // help GC and clear them - x.fset = nil - x.sources.Reset() - x.current = nil // contains reference to fset! - } - - // for each word, reduce the RunLists into a LookupResult; - // also collect the word with its canonical spelling in a - // word list for later computation of alternative spellings - words := make(map[string]*LookupResult) - var wlist RunList - for w, h := range x.words { - decls := reduce(h.Decls) - others := reduce(h.Others) - words[w] = &LookupResult{ - Decls: decls, - Others: others, - } - wlist = append(wlist, &wordPair{canonical(w), w}) - th.Throttle() - } - x.stats.Words = len(words) - - // reduce the word list {canonical(w), w} into - // a list of AltWords runs {canonical(w), {w}} - alist := wlist.reduce(lessWordPair, newAltWords) - - // convert alist into a map of alternative spellings - alts := make(map[string]*AltWords) - for i := 0; i < len(alist); i++ { - a := alist[i].(*AltWords) - alts[a.Canon] = a - } - - // create text index - var suffixes *suffixarray.Index - if fulltextIndex { - suffixes = suffixarray.New(x.sources.Bytes()) - } - - return &Index{x.fset, suffixes, words, alts, x.snippets, x.stats} -} - -type fileIndex struct { - Words map[string]*LookupResult - Alts map[string]*AltWords - Snippets []*Snippet - Fulltext bool -} - -func (x *fileIndex) Write(w io.Writer) error { - return gob.NewEncoder(w).Encode(x) -} - -func (x *fileIndex) Read(r io.Reader) error { - return gob.NewDecoder(r).Decode(x) -} - -// Write writes the index x to w. -func (x *Index) Write(w io.Writer) error { - fulltext := false - if x.suffixes != nil { - fulltext = true - } - fx := fileIndex{ - x.words, - x.alts, - x.snippets, - fulltext, - } - if err := fx.Write(w); err != nil { - return err - } - if fulltext { - encode := func(x interface{}) error { - return gob.NewEncoder(w).Encode(x) - } - if err := x.fset.Write(encode); err != nil { - return err - } - if err := x.suffixes.Write(w); err != nil { - return err - } - } - return nil -} - -// Read reads the index from r into x; x must not be nil. -// If r does not also implement io.ByteReader, it will be wrapped in a bufio.Reader. -func (x *Index) Read(r io.Reader) error { - // We use the ability to read bytes as a plausible surrogate for buffering. - if _, ok := r.(io.ByteReader); !ok { - r = bufio.NewReader(r) - } - var fx fileIndex - if err := fx.Read(r); err != nil { - return err - } - x.words = fx.Words - x.alts = fx.Alts - x.snippets = fx.Snippets - if fx.Fulltext { - x.fset = token.NewFileSet() - decode := func(x interface{}) error { - return gob.NewDecoder(r).Decode(x) - } - if err := x.fset.Read(decode); err != nil { - return err - } - x.suffixes = new(suffixarray.Index) - if err := x.suffixes.Read(r); err != nil { - return err - } - } - return nil -} - -// Stats() returns index statistics. -func (x *Index) Stats() Statistics { - return x.stats -} - -func (x *Index) lookupWord(w string) (match *LookupResult, alt *AltWords) { - match = x.words[w] - alt = x.alts[canonical(w)] - // remove current spelling from alternatives - // (if there is no match, the alternatives do - // not contain the current spelling) - if match != nil && alt != nil { - alt = alt.filter(w) - } - return -} - -// isIdentifier reports whether s is a Go identifier. -func isIdentifier(s string) bool { - for i, ch := range s { - if unicode.IsLetter(ch) || ch == ' ' || i > 0 && unicode.IsDigit(ch) { - continue - } - return false - } - return len(s) > 0 -} - -// For a given query, which is either a single identifier or a qualified -// identifier, Lookup returns a list of packages, a LookupResult, and a -// list of alternative spellings, if any. Any and all results may be nil. -// If the query syntax is wrong, an error is reported. -func (x *Index) Lookup(query string) (paks HitList, match *LookupResult, alt *AltWords, err error) { - ss := strings.Split(query, ".") - - // check query syntax - for _, s := range ss { - if !isIdentifier(s) { - err = errors.New("all query parts must be identifiers") - return - } - } - - // handle simple and qualified identifiers - switch len(ss) { - case 1: - ident := ss[0] - match, alt = x.lookupWord(ident) - if match != nil { - // found a match - filter packages with same name - // for the list of packages called ident, if any - paks = match.Others.filter(ident) - } - - case 2: - pakname, ident := ss[0], ss[1] - match, alt = x.lookupWord(ident) - if match != nil { - // found a match - filter by package name - // (no paks - package names are not qualified) - decls := match.Decls.filter(pakname) - others := match.Others.filter(pakname) - match = &LookupResult{decls, others} - } - - default: - err = errors.New("query is not a (qualified) identifier") - } - - return -} - -func (x *Index) Snippet(i int) *Snippet { - // handle illegal snippet indices gracefully - if 0 <= i && i < len(x.snippets) { - return x.snippets[i] - } - return nil -} - -type positionList []struct { - filename string - line int -} - -func (list positionList) Len() int { return len(list) } -func (list positionList) Less(i, j int) bool { return list[i].filename < list[j].filename } -func (list positionList) Swap(i, j int) { list[i], list[j] = list[j], list[i] } - -// unique returns the list sorted and with duplicate entries removed -func unique(list []int) []int { - sort.Ints(list) - var last int - i := 0 - for _, x := range list { - if i == 0 || x != last { - last = x - list[i] = x - i++ - } - } - return list[0:i] -} - -// A FileLines value specifies a file and line numbers within that file. -type FileLines struct { - Filename string - Lines []int -} - -// LookupRegexp returns the number of matches and the matches where a regular -// expression r is found in the full text index. At most n matches are -// returned (thus found <= n). -// -func (x *Index) LookupRegexp(r *regexp.Regexp, n int) (found int, result []FileLines) { - if x.suffixes == nil || n <= 0 { - return - } - // n > 0 - - var list positionList - // FindAllIndex may returns matches that span across file boundaries. - // Such matches are unlikely, buf after eliminating them we may end up - // with fewer than n matches. If we don't have enough at the end, redo - // the search with an increased value n1, but only if FindAllIndex - // returned all the requested matches in the first place (if it - // returned fewer than that there cannot be more). - for n1 := n; found < n; n1 += n - found { - found = 0 - matches := x.suffixes.FindAllIndex(r, n1) - // compute files, exclude matches that span file boundaries, - // and map offsets to file-local offsets - list = make(positionList, len(matches)) - for _, m := range matches { - // by construction, an offset corresponds to the Pos value - // for the file set - use it to get the file and line - p := token.Pos(m[0]) - if file := x.fset.File(p); file != nil { - if base := file.Base(); base <= m[1] && m[1] <= base+file.Size() { - // match [m[0], m[1]) is within the file boundaries - list[found].filename = file.Name() - list[found].line = file.Line(p) - found++ - } - } - } - if found == n || len(matches) < n1 { - // found all matches or there's no chance to find more - break - } - } - list = list[0:found] - sort.Sort(list) // sort by filename - - // collect matches belonging to the same file - var last string - var lines []int - addLines := func() { - if len(lines) > 0 { - // remove duplicate lines - result = append(result, FileLines{last, unique(lines)}) - lines = nil - } - } - for _, m := range list { - if m.filename != last { - addLines() - last = m.filename - } - lines = append(lines, m.line) - } - addLines() - - return -} |
