diff options
author | Ondřej Surý <ondrej@sury.org> | 2011-09-13 13:13:40 +0200 |
---|---|---|
committer | Ondřej Surý <ondrej@sury.org> | 2011-09-13 13:13:40 +0200 |
commit | 5ff4c17907d5b19510a62e08fd8d3b11e62b431d (patch) | |
tree | c0650497e988f47be9c6f2324fa692a52dea82e1 /src/cmd/godoc/index.go | |
parent | 80f18fc933cf3f3e829c5455a1023d69f7b86e52 (diff) | |
download | golang-upstream/60.tar.gz |
Imported Upstream version 60upstream/60
Diffstat (limited to 'src/cmd/godoc/index.go')
-rw-r--r-- | src/cmd/godoc/index.go | 986 |
1 files changed, 986 insertions, 0 deletions
diff --git a/src/cmd/godoc/index.go b/src/cmd/godoc/index.go new file mode 100644 index 000000000..9b4f31514 --- /dev/null +++ b/src/cmd/godoc/index.go @@ -0,0 +1,986 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file contains the infrastructure to create an +// identifier and full-text index for a set of Go files. +// +// Algorithm for identifier index: +// - traverse all .go files of the file tree specified by root +// - for each word (identifier) encountered, collect all occurrences (spots) +// into a list; this produces a list of spots for each word +// - reduce the lists: from a list of spots to a list of FileRuns, +// and from a list of FileRuns into a list of PakRuns +// - make a HitList from the PakRuns +// +// Details: +// - keep two lists per word: one containing package-level declarations +// that have snippets, and one containing all other spots +// - keep the snippets in a separate table indexed by snippet index +// and store the snippet index in place of the line number in a SpotInfo +// (the line number for spots with snippets is stored in the snippet) +// - at the end, create lists of alternative spellings for a given +// word +// +// Algorithm for full text index: +// - concatenate all source code in a byte buffer (in memory) +// - add the files to a file set in lockstep as they are added to the byte +// buffer such that a byte buffer offset corresponds to the Pos value for +// that file location +// - create a suffix array from the concatenated sources +// +// String lookup in full text index: +// - use the suffix array to lookup a string's offsets - the offsets +// correspond to the Pos values relative to the file set +// - translate the Pos values back into file and line information and +// sort the result + +package main + +import ( + "bytes" + "container/vector" + "go/ast" + "go/parser" + "go/token" + "go/scanner" + "index/suffixarray" + "os" + "path/filepath" + "regexp" + "sort" + "strings" +) + +// ---------------------------------------------------------------------------- +// RunList + +// A RunList is a vector of entries that can be sorted according to some +// criteria. A RunList may be compressed by grouping "runs" of entries +// which are equal (according to the sort critera) into a new RunList of +// runs. For instance, a RunList containing pairs (x, y) may be compressed +// into a RunList containing pair runs (x, {y}) where each run consists of +// a list of y's with the same x. +type RunList struct { + vector.Vector + less func(x, y interface{}) bool +} + +func (h *RunList) Less(i, j int) bool { return h.less(h.At(i), h.At(j)) } + +func (h *RunList) sort(less func(x, y interface{}) bool) { + h.less = less + sort.Sort(h) +} + +// Compress entries which are the same according to a sort criteria +// (specified by less) into "runs". +func (h *RunList) reduce(less func(x, y interface{}) bool, newRun func(h *RunList, i, j int) interface{}) *RunList { + // create runs of entries with equal values + h.sort(less) + + // for each run, make a new run object and collect them in a new RunList + var hh RunList + i := 0 + for j := 0; j < h.Len(); j++ { + if less(h.At(i), h.At(j)) { + hh.Push(newRun(h, i, j)) + i = j // start a new run + } + } + // add final run, if any + if i < h.Len() { + hh.Push(newRun(h, i, h.Len())) + } + + return &hh +} + +// ---------------------------------------------------------------------------- +// SpotInfo + +// A SpotInfo value describes a particular identifier spot in a given file; +// It encodes three values: the SpotKind (declaration or use), a line or +// snippet index "lori", and whether it's a line or index. +// +// The following encoding is used: +// +// bits 32 4 1 0 +// value [lori|kind|isIndex] +// +type SpotInfo uint32 + +// SpotKind describes whether an identifier is declared (and what kind of +// declaration) or used. +type SpotKind uint32 + +const ( + PackageClause SpotKind = iota + ImportDecl + ConstDecl + TypeDecl + VarDecl + FuncDecl + MethodDecl + Use + nKinds +) + +func init() { + // sanity check: if nKinds is too large, the SpotInfo + // accessor functions may need to be updated + if nKinds > 8 { + panic("nKinds > 8") + } +} + +// makeSpotInfo makes a SpotInfo. +func makeSpotInfo(kind SpotKind, lori int, isIndex bool) SpotInfo { + // encode lori: bits [4..32) + x := SpotInfo(lori) << 4 + if int(x>>4) != lori { + // lori value doesn't fit - since snippet indices are + // most certainly always smaller then 1<<28, this can + // only happen for line numbers; give it no line number (= 0) + x = 0 + } + // encode kind: bits [1..4) + x |= SpotInfo(kind) << 1 + // encode isIndex: bit 0 + if isIndex { + x |= 1 + } + return x +} + +func (x SpotInfo) Kind() SpotKind { return SpotKind(x >> 1 & 7) } +func (x SpotInfo) Lori() int { return int(x >> 4) } +func (x SpotInfo) IsIndex() bool { return x&1 != 0 } + +// ---------------------------------------------------------------------------- +// KindRun + +// Debugging support. Disable to see multiple entries per line. +const removeDuplicates = true + +// A KindRun is a run of SpotInfos of the same kind in a given file. +type KindRun struct { + Kind SpotKind + Infos []SpotInfo +} + +// KindRuns are sorted by line number or index. Since the isIndex bit +// is always the same for all infos in one list we can compare lori's. +func (f *KindRun) Len() int { return len(f.Infos) } +func (f *KindRun) Less(i, j int) bool { return f.Infos[i].Lori() < f.Infos[j].Lori() } +func (f *KindRun) Swap(i, j int) { f.Infos[i], f.Infos[j] = f.Infos[j], f.Infos[i] } + +// FileRun contents are sorted by Kind for the reduction into KindRuns. +func lessKind(x, y interface{}) bool { return x.(SpotInfo).Kind() < y.(SpotInfo).Kind() } + +// newKindRun allocates a new KindRun from the SpotInfo run [i, j) in h. +func newKindRun(h *RunList, i, j int) interface{} { + kind := h.At(i).(SpotInfo).Kind() + infos := make([]SpotInfo, j-i) + k := 0 + for ; i < j; i++ { + infos[k] = h.At(i).(SpotInfo) + k++ + } + run := &KindRun{kind, infos} + + // Spots were sorted by file and kind to create this run. + // Within this run, sort them by line number or index. + sort.Sort(run) + + if removeDuplicates { + // Since both the lori and kind field must be + // same for duplicates, and since the isIndex + // bit is always the same for all infos in one + // list we can simply compare the entire info. + k := 0 + var prev SpotInfo + for i, x := range infos { + if x != prev || i == 0 { + infos[k] = x + k++ + prev = x + } + } + run.Infos = infos[0:k] + } + + return run +} + +// ---------------------------------------------------------------------------- +// FileRun + +// A Pak describes a Go package. +type Pak struct { + Path string // path of directory containing the package + Name string // package name as declared by package clause +} + +// Paks are sorted by name (primary key) and by import path (secondary key). +func (p *Pak) less(q *Pak) bool { + return p.Name < q.Name || p.Name == q.Name && p.Path < q.Path +} + +// A File describes a Go file. +type File struct { + Path string // complete file name + Pak Pak // the package to which the file belongs +} + +// A Spot describes a single occurrence of a word. +type Spot struct { + File *File + Info SpotInfo +} + +// A FileRun is a list of KindRuns belonging to the same file. +type FileRun struct { + File *File + Groups []*KindRun +} + +// Spots are sorted by path for the reduction into FileRuns. +func lessSpot(x, y interface{}) bool { return x.(Spot).File.Path < y.(Spot).File.Path } + +// newFileRun allocates a new FileRun from the Spot run [i, j) in h. +func newFileRun(h0 *RunList, i, j int) interface{} { + file := h0.At(i).(Spot).File + + // reduce the list of Spots into a list of KindRuns + var h1 RunList + h1.Vector.Resize(j-i, 0) + k := 0 + for ; i < j; i++ { + h1.Set(k, h0.At(i).(Spot).Info) + k++ + } + h2 := h1.reduce(lessKind, newKindRun) + + // create the FileRun + groups := make([]*KindRun, h2.Len()) + for i := 0; i < h2.Len(); i++ { + groups[i] = h2.At(i).(*KindRun) + } + return &FileRun{file, groups} +} + +// ---------------------------------------------------------------------------- +// PakRun + +// A PakRun describes a run of *FileRuns of a package. +type PakRun struct { + Pak Pak + Files []*FileRun +} + +// Sorting support for files within a PakRun. +func (p *PakRun) Len() int { return len(p.Files) } +func (p *PakRun) Less(i, j int) bool { return p.Files[i].File.Path < p.Files[j].File.Path } +func (p *PakRun) Swap(i, j int) { p.Files[i], p.Files[j] = p.Files[j], p.Files[i] } + +// FileRuns are sorted by package for the reduction into PakRuns. +func lessFileRun(x, y interface{}) bool { + return x.(*FileRun).File.Pak.less(&y.(*FileRun).File.Pak) +} + +// newPakRun allocates a new PakRun from the *FileRun run [i, j) in h. +func newPakRun(h *RunList, i, j int) interface{} { + pak := h.At(i).(*FileRun).File.Pak + files := make([]*FileRun, j-i) + k := 0 + for ; i < j; i++ { + files[k] = h.At(i).(*FileRun) + k++ + } + run := &PakRun{pak, files} + sort.Sort(run) // files were sorted by package; sort them by file now + return run +} + +// ---------------------------------------------------------------------------- +// HitList + +// A HitList describes a list of PakRuns. +type HitList []*PakRun + +// PakRuns are sorted by package. +func lessPakRun(x, y interface{}) bool { return x.(*PakRun).Pak.less(&y.(*PakRun).Pak) } + +func reduce(h0 *RunList) HitList { + // reduce a list of Spots into a list of FileRuns + h1 := h0.reduce(lessSpot, newFileRun) + // reduce a list of FileRuns into a list of PakRuns + h2 := h1.reduce(lessFileRun, newPakRun) + // sort the list of PakRuns by package + h2.sort(lessPakRun) + // create a HitList + h := make(HitList, h2.Len()) + for i := 0; i < h2.Len(); i++ { + h[i] = h2.At(i).(*PakRun) + } + return h +} + +func (h HitList) filter(pakname string) HitList { + // determine number of matching packages (most of the time just one) + n := 0 + for _, p := range h { + if p.Pak.Name == pakname { + n++ + } + } + // create filtered HitList + hh := make(HitList, n) + i := 0 + for _, p := range h { + if p.Pak.Name == pakname { + hh[i] = p + i++ + } + } + return hh +} + +// ---------------------------------------------------------------------------- +// AltWords + +type wordPair struct { + canon string // canonical word spelling (all lowercase) + alt string // alternative spelling +} + +// An AltWords describes a list of alternative spellings for a +// canonical (all lowercase) spelling of a word. +type AltWords struct { + Canon string // canonical word spelling (all lowercase) + Alts []string // alternative spelling for the same word +} + +// wordPairs are sorted by their canonical spelling. +func lessWordPair(x, y interface{}) bool { return x.(*wordPair).canon < y.(*wordPair).canon } + +// newAltWords allocates a new AltWords from the *wordPair run [i, j) in h. +func newAltWords(h *RunList, i, j int) interface{} { + canon := h.At(i).(*wordPair).canon + alts := make([]string, j-i) + k := 0 + for ; i < j; i++ { + alts[k] = h.At(i).(*wordPair).alt + k++ + } + return &AltWords{canon, alts} +} + +func (a *AltWords) filter(s string) *AltWords { + if len(a.Alts) == 1 && a.Alts[0] == s { + // there are no different alternatives + return nil + } + + // make a new AltWords with the current spelling removed + alts := make([]string, len(a.Alts)) + i := 0 + for _, w := range a.Alts { + if w != s { + alts[i] = w + i++ + } + } + return &AltWords{a.Canon, alts[0:i]} +} + +// ---------------------------------------------------------------------------- +// Indexer + +// Adjust these flags as seems best. +const includeMainPackages = true +const includeTestFiles = true + +type IndexResult struct { + Decls RunList // package-level declarations (with snippets) + Others RunList // all other occurrences +} + +// Statistics provides statistics information for an index. +type Statistics struct { + Bytes int // total size of indexed source files + Files int // number of indexed source files + Lines int // number of lines (all files) + Words int // number of different identifiers + Spots int // number of identifier occurrences +} + +// An Indexer maintains the data structures and provides the machinery +// for indexing .go files under a file tree. It implements the path.Visitor +// interface for walking file trees, and the ast.Visitor interface for +// walking Go ASTs. +type Indexer struct { + fset *token.FileSet // file set for all indexed files + sources bytes.Buffer // concatenated sources + words map[string]*IndexResult // RunLists of Spots + snippets vector.Vector // vector of *Snippets, indexed by snippet indices + current *token.File // last file added to file set + file *File // AST for current file + decl ast.Decl // AST for current decl + stats Statistics +} + +func (x *Indexer) addSnippet(s *Snippet) int { + index := x.snippets.Len() + x.snippets.Push(s) + return index +} + +func (x *Indexer) visitComment(c *ast.CommentGroup) { + if c != nil { + ast.Walk(x, c) + } +} + +func (x *Indexer) visitIdent(kind SpotKind, id *ast.Ident) { + if id != nil { + lists, found := x.words[id.Name] + if !found { + lists = new(IndexResult) + x.words[id.Name] = lists + } + + if kind == Use || x.decl == nil { + // not a declaration or no snippet required + info := makeSpotInfo(kind, x.current.Line(id.Pos()), false) + lists.Others.Push(Spot{x.file, info}) + } else { + // a declaration with snippet + index := x.addSnippet(NewSnippet(x.fset, x.decl, id)) + info := makeSpotInfo(kind, index, true) + lists.Decls.Push(Spot{x.file, info}) + } + + x.stats.Spots++ + } +} + +func (x *Indexer) visitSpec(spec ast.Spec, isVarDecl bool) { + switch n := spec.(type) { + case *ast.ImportSpec: + x.visitComment(n.Doc) + x.visitIdent(ImportDecl, n.Name) + ast.Walk(x, n.Path) + x.visitComment(n.Comment) + + case *ast.ValueSpec: + x.visitComment(n.Doc) + kind := ConstDecl + if isVarDecl { + kind = VarDecl + } + for _, n := range n.Names { + x.visitIdent(kind, n) + } + ast.Walk(x, n.Type) + for _, v := range n.Values { + ast.Walk(x, v) + } + x.visitComment(n.Comment) + + case *ast.TypeSpec: + x.visitComment(n.Doc) + x.visitIdent(TypeDecl, n.Name) + ast.Walk(x, n.Type) + x.visitComment(n.Comment) + } +} + +func (x *Indexer) Visit(node ast.Node) ast.Visitor { + // TODO(gri): methods in interface types are categorized as VarDecl + switch n := node.(type) { + case nil: + return nil + + case *ast.Ident: + x.visitIdent(Use, n) + + case *ast.Field: + x.decl = nil // no snippets for fields + x.visitComment(n.Doc) + for _, m := range n.Names { + x.visitIdent(VarDecl, m) + } + ast.Walk(x, n.Type) + ast.Walk(x, n.Tag) + x.visitComment(n.Comment) + + case *ast.DeclStmt: + if decl, ok := n.Decl.(*ast.GenDecl); ok { + // local declarations can only be *ast.GenDecls + x.decl = nil // no snippets for local declarations + x.visitComment(decl.Doc) + for _, s := range decl.Specs { + x.visitSpec(s, decl.Tok == token.VAR) + } + } else { + // handle error case gracefully + ast.Walk(x, n.Decl) + } + + case *ast.GenDecl: + x.decl = n + x.visitComment(n.Doc) + for _, s := range n.Specs { + x.visitSpec(s, n.Tok == token.VAR) + } + + case *ast.FuncDecl: + x.visitComment(n.Doc) + kind := FuncDecl + if n.Recv != nil { + kind = MethodDecl + ast.Walk(x, n.Recv) + } + x.decl = n + x.visitIdent(kind, n.Name) + ast.Walk(x, n.Type) + if n.Body != nil { + ast.Walk(x, n.Body) + } + + case *ast.File: + x.visitComment(n.Doc) + x.decl = nil + x.visitIdent(PackageClause, n.Name) + for _, d := range n.Decls { + ast.Walk(x, d) + } + // don't visit package level comments for now + // to avoid duplicate visiting from individual + // nodes + + default: + return x + } + + return nil +} + +func pkgName(filename string) string { + // use a new file set each time in order to not pollute the indexer's + // file set (which must stay in sync with the concatenated source code) + file, err := parser.ParseFile(token.NewFileSet(), filename, nil, parser.PackageClauseOnly) + if err != nil || file == nil { + return "" + } + return file.Name.Name +} + +// addFile adds a file to the index if possible and returns the file set file +// and the file's AST if it was successfully parsed as a Go file. If addFile +// failed (that is, if the file was not added), it returns file == nil. +func (x *Indexer) addFile(filename string, goFile bool) (file *token.File, ast *ast.File) { + // open file + f, err := fs.Open(filename) + if err != nil { + return + } + defer f.Close() + + // The file set's base offset and x.sources size must be in lock-step; + // this permits the direct mapping of suffix array lookup results to + // to corresponding Pos values. + // + // When a file is added to the file set, its offset base increases by + // the size of the file + 1; and the initial base offset is 1. Add an + // extra byte to the sources here. + x.sources.WriteByte(0) + + // If the sources length doesn't match the file set base at this point + // the file set implementation changed or we have another error. + base := x.fset.Base() + if x.sources.Len() != base { + panic("internal error - file base incorrect") + } + + // append file contents (src) to x.sources + if _, err := x.sources.ReadFrom(f); err == nil { + src := x.sources.Bytes()[base:] + + if goFile { + // parse the file and in the process add it to the file set + if ast, err = parser.ParseFile(x.fset, filename, src, parser.ParseComments); err == nil { + file = x.fset.File(ast.Pos()) // ast.Pos() is inside the file + return + } + // file has parse errors, and the AST may be incorrect - + // set lines information explicitly and index as ordinary + // text file (cannot fall through to the text case below + // because the file has already been added to the file set + // by the parser) + file = x.fset.File(token.Pos(base)) // token.Pos(base) is inside the file + file.SetLinesForContent(src) + ast = nil + return + } + + if isText(src) { + // only add the file to the file set (for the full text index) + file = x.fset.AddFile(filename, x.fset.Base(), len(src)) + file.SetLinesForContent(src) + return + } + } + + // discard possibly added data + x.sources.Truncate(base - 1) // -1 to remove added byte 0 since no file was added + return +} + +// Design note: Using an explicit white list of permitted files for indexing +// makes sure that the important files are included and massively reduces the +// number of files to index. The advantage over a blacklist is that unexpected +// (non-blacklisted) files won't suddenly explode the index. +// +// TODO(gri): We may want to make this list customizable, perhaps via a flag. + +// Files are whitelisted if they have a file name or extension +// present as key in whitelisted. +var whitelisted = map[string]bool{ + ".bash": true, + ".c": true, + ".css": true, + ".go": true, + ".goc": true, + ".h": true, + ".html": true, + ".js": true, + ".out": true, + ".py": true, + ".s": true, + ".sh": true, + ".txt": true, + ".xml": true, + "AUTHORS": true, + "CONTRIBUTORS": true, + "LICENSE": true, + "Makefile": true, + "PATENTS": true, + "README": true, +} + +// isWhitelisted returns true if a file is on the list +// of "permitted" files for indexing. The filename must +// be the directory-local name of the file. +func isWhitelisted(filename string) bool { + key := filepath.Ext(filename) + if key == "" { + // file has no extension - use entire filename + key = filename + } + return whitelisted[key] +} + +func (x *Indexer) visitFile(dirname string, f FileInfo, fulltextIndex bool) { + if !f.IsRegular() { + return + } + + filename := filepath.Join(dirname, f.Name()) + goFile := false + + switch { + case isGoFile(f): + if !includeTestFiles && (!isPkgFile(f) || strings.HasPrefix(filename, "test/")) { + return + } + if !includeMainPackages && pkgName(filename) == "main" { + return + } + goFile = true + + case !fulltextIndex || !isWhitelisted(f.Name()): + return + } + + file, fast := x.addFile(filename, goFile) + if file == nil { + return // addFile failed + } + + if fast != nil { + // we've got a Go file to index + x.current = file + dir, _ := filepath.Split(filename) + pak := Pak{dir, fast.Name.Name} + x.file = &File{filename, pak} + ast.Walk(x, fast) + } + + // update statistics + x.stats.Bytes += file.Size() + x.stats.Files++ + x.stats.Lines += file.LineCount() +} + +// ---------------------------------------------------------------------------- +// Index + +type LookupResult struct { + Decls HitList // package-level declarations (with snippets) + Others HitList // all other occurrences +} + +type Index struct { + fset *token.FileSet // file set used during indexing; nil if no textindex + suffixes *suffixarray.Index // suffixes for concatenated sources; nil if no textindex + words map[string]*LookupResult // maps words to hit lists + alts map[string]*AltWords // maps canonical(words) to lists of alternative spellings + snippets []*Snippet // all snippets, indexed by snippet index + stats Statistics +} + +func canonical(w string) string { return strings.ToLower(w) } + +// NewIndex creates a new index for the .go files +// in the directories given by dirnames. +// +func NewIndex(dirnames <-chan string, fulltextIndex bool) *Index { + var x Indexer + + // initialize Indexer + x.fset = token.NewFileSet() + x.words = make(map[string]*IndexResult) + + // index all files in the directories given by dirnames + for dirname := range dirnames { + list, err := fs.ReadDir(dirname) + if err != nil { + continue // ignore this directory + } + for _, f := range list { + if !f.IsDirectory() { + x.visitFile(dirname, f, fulltextIndex) + } + } + } + + if !fulltextIndex { + // the file set, the current file, and the sources are + // not needed after indexing if no text index is built - + // help GC and clear them + x.fset = nil + x.sources.Reset() + x.current = nil // contains reference to fset! + } + + // for each word, reduce the RunLists into a LookupResult; + // also collect the word with its canonical spelling in a + // word list for later computation of alternative spellings + words := make(map[string]*LookupResult) + var wlist RunList + for w, h := range x.words { + decls := reduce(&h.Decls) + others := reduce(&h.Others) + words[w] = &LookupResult{ + Decls: decls, + Others: others, + } + wlist.Push(&wordPair{canonical(w), w}) + } + x.stats.Words = len(words) + + // reduce the word list {canonical(w), w} into + // a list of AltWords runs {canonical(w), {w}} + alist := wlist.reduce(lessWordPair, newAltWords) + + // convert alist into a map of alternative spellings + alts := make(map[string]*AltWords) + for i := 0; i < alist.Len(); i++ { + a := alist.At(i).(*AltWords) + alts[a.Canon] = a + } + + // convert snippet vector into a list + snippets := make([]*Snippet, x.snippets.Len()) + for i := 0; i < x.snippets.Len(); i++ { + snippets[i] = x.snippets.At(i).(*Snippet) + } + + // create text index + var suffixes *suffixarray.Index + if fulltextIndex { + suffixes = suffixarray.New(x.sources.Bytes()) + } + + return &Index{x.fset, suffixes, words, alts, snippets, x.stats} +} + +// Stats() returns index statistics. +func (x *Index) Stats() Statistics { + return x.stats +} + +func (x *Index) LookupWord(w string) (match *LookupResult, alt *AltWords) { + match = x.words[w] + alt = x.alts[canonical(w)] + // remove current spelling from alternatives + // (if there is no match, the alternatives do + // not contain the current spelling) + if match != nil && alt != nil { + alt = alt.filter(w) + } + return +} + +func isIdentifier(s string) bool { + var S scanner.Scanner + fset := token.NewFileSet() + S.Init(fset.AddFile("", fset.Base(), len(s)), []byte(s), nil, 0) + if _, tok, _ := S.Scan(); tok == token.IDENT { + _, tok, _ := S.Scan() + return tok == token.EOF + } + return false +} + +// For a given query, which is either a single identifier or a qualified +// identifier, Lookup returns a LookupResult, and a list of alternative +// spellings, if any. If the query syntax is wrong, an error is reported. +func (x *Index) Lookup(query string) (match *LookupResult, alt *AltWords, err os.Error) { + ss := strings.Split(query, ".") + + // check query syntax + for _, s := range ss { + if !isIdentifier(s) { + err = os.NewError("all query parts must be identifiers") + return + } + } + + switch len(ss) { + case 1: + match, alt = x.LookupWord(ss[0]) + + case 2: + pakname := ss[0] + match, alt = x.LookupWord(ss[1]) + if match != nil { + // found a match - filter by package name + decls := match.Decls.filter(pakname) + others := match.Others.filter(pakname) + match = &LookupResult{decls, others} + } + + default: + err = os.NewError("query is not a (qualified) identifier") + } + + return +} + +func (x *Index) Snippet(i int) *Snippet { + // handle illegal snippet indices gracefully + if 0 <= i && i < len(x.snippets) { + return x.snippets[i] + } + return nil +} + +type positionList []struct { + filename string + line int +} + +func (list positionList) Len() int { return len(list) } +func (list positionList) Less(i, j int) bool { return list[i].filename < list[j].filename } +func (list positionList) Swap(i, j int) { list[i], list[j] = list[j], list[i] } + +// unique returns the list sorted and with duplicate entries removed +func unique(list []int) []int { + sort.Ints(list) + var last int + i := 0 + for _, x := range list { + if i == 0 || x != last { + last = x + list[i] = x + i++ + } + } + return list[0:i] +} + +// A FileLines value specifies a file and line numbers within that file. +type FileLines struct { + Filename string + Lines []int +} + +// LookupRegexp returns the number of matches and the matches where a regular +// expression r is found in the full text index. At most n matches are +// returned (thus found <= n). +// +func (x *Index) LookupRegexp(r *regexp.Regexp, n int) (found int, result []FileLines) { + if x.suffixes == nil || n <= 0 { + return + } + // n > 0 + + var list positionList + // FindAllIndex may returns matches that span across file boundaries. + // Such matches are unlikely, buf after eliminating them we may end up + // with fewer than n matches. If we don't have enough at the end, redo + // the search with an increased value n1, but only if FindAllIndex + // returned all the requested matches in the first place (if it + // returned fewer than that there cannot be more). + for n1 := n; found < n; n1 += n - found { + found = 0 + matches := x.suffixes.FindAllIndex(r, n1) + // compute files, exclude matches that span file boundaries, + // and map offsets to file-local offsets + list = make(positionList, len(matches)) + for _, m := range matches { + // by construction, an offset corresponds to the Pos value + // for the file set - use it to get the file and line + p := token.Pos(m[0]) + if file := x.fset.File(p); file != nil { + if base := file.Base(); base <= m[1] && m[1] <= base+file.Size() { + // match [m[0], m[1]) is within the file boundaries + list[found].filename = file.Name() + list[found].line = file.Line(p) + found++ + } + } + } + if found == n || len(matches) < n1 { + // found all matches or there's no chance to find more + break + } + } + list = list[0:found] + sort.Sort(list) // sort by filename + + // collect matches belonging to the same file + var last string + var lines []int + addLines := func() { + if len(lines) > 0 { + // remove duplicate lines + result = append(result, FileLines{last, unique(lines)}) + lines = nil + } + } + for _, m := range list { + if m.filename != last { + addLines() + last = m.filename + } + lines = append(lines, m.line) + } + addLines() + + return +} |