1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
|
//
//
// SearchableIndex.cs: Index that uses Lucene to search through the docs
//
// Author: Mario Sopena
//
using System;
using System.IO;
using System.Collections.Generic;
// Lucene imports
using Lucene.Net.Index;
using Lucene.Net.Documents;
using Lucene.Net.Analysis;
using Lucene.Net.Analysis.Standard;
using Lucene.Net.Search;
using Lucene.Net.QueryParsers;
using Lucene.Net.Store;
namespace Monodoc
{
public class SearchableIndex
{
const int maxSearchCount = 30;
IndexSearcher searcher;
string dir;
public string Dir {
get {
if (dir == null)
dir = "search_index";
return dir;
}
set { dir = value; }
}
public static SearchableIndex Load (string dir)
{
SearchableIndex s = new SearchableIndex ();
s.dir = dir;
try {
//s.searcher = new IndexSearcher (dir);
// TODO: parametrize that depending if we run on the desktop (low footprint) or the server (use RAMDirectory for instance)
s.searcher = new IndexSearcher (FSDirectory.Open (dir));
} catch (IOException) {
Console.WriteLine ("Index nonexistent or in bad format");
return null;
}
return s;
}
public Result Search (string term)
{
return Search (term, maxSearchCount);
}
public Result Search (string term, int count)
{
return Search (term, count, 0);
}
public Result Search (string term, int count, int start) {
try {
term = term.ToLower ();
Term htTerm = new Term ("hottext", term);
Query qq1 = new FuzzyQuery (htTerm);
Query qq2 = new TermQuery (htTerm);
qq2.Boost = 10f;
Query qq3 = new PrefixQuery (htTerm);
qq3.Boost = 10f;
DisjunctionMaxQuery q1 = new DisjunctionMaxQuery (0f);
q1.Add (qq1);
q1.Add (qq2);
q1.Add (qq3);
Query q2 = new TermQuery (new Term ("text", term));
q2.Boost = 3f;
Query q3 = new TermQuery (new Term ("examples", term));
q3.Boost = 3f;
DisjunctionMaxQuery q = new DisjunctionMaxQuery (0f);
q.Add (q1);
q.Add (q2);
q.Add (q3);
TopDocs top = SearchInternal (q, count, start);
Result r = new Result (term, searcher, top.ScoreDocs);
return r;
} catch (IOException) {
Console.WriteLine ("No index in {0}", dir);
return null;
}
}
TopDocs SearchInternal (Query q, int count, int start)
{
// Easy path that doesn't involve creating a Collector ourselves
// watch for Lucene.NET improvement on that (like searcher.SearchAfter)
if (start == 0)
return searcher.Search (q, count);
var weight = searcher.CreateWeight (q); // TODO: reuse weight instead of query
var collector = TopScoreDocCollector.Create (start + count + 1, false);
searcher.Search (q, collector);
return collector.TopDocs (start, count);
}
public Result FastSearch (string term, int number)
{
try {
term = term.ToLower ();
Query q1 = new TermQuery (new Term ("hottext", term));
Query q2 = new PrefixQuery (new Term ("hottext", term));
q2.Boost = 0.5f;
DisjunctionMaxQuery q = new DisjunctionMaxQuery (0f);
q.Add (q1);
q.Add (q2);
TopDocs top = searcher.Search (q, number);
return new Result (term, searcher, top.ScoreDocs);
} catch (IOException) {
Console.WriteLine ("No index in {0}", dir);
return null;
}
}
}
//
// An object representing the search term with the results
//
public class Result {
string term;
Searcher searcher;
ScoreDoc[] docs;
public string Term {
get { return term;}
}
public int Count {
get { return docs.Length; }
}
public Document this [int i] {
get { return searcher.Doc (docs[i].Doc); }
}
public string GetTitle (int i)
{
Document d = this[i];
return d == null ? string.Empty : d.Get ("title");
}
public string GetUrl (int i)
{
Document d = this[i];
return d == null ? string.Empty : d.Get ("url");
}
public string GetFullTitle (int i)
{
Document d = this[i];
return d == null ? string.Empty : d.Get ("fulltitle");
}
public float Score (int i)
{
return docs[i].Score;
}
public Result (string Term, Searcher searcher, ScoreDoc[] docs)
{
this.term = Term;
this.searcher = searcher;
this.docs = docs;
}
}
}
|