1 |
| |
2 |
| |
3 |
| |
4 |
| |
5 |
| |
6 |
| |
7 |
| |
8 |
| |
9 |
| |
10 |
| |
11 |
| |
12 |
| |
13 |
| |
14 |
| |
15 |
| |
16 |
| |
17 |
| |
18 |
| |
19 |
| |
20 |
| package org.apache.xindice.core.indexer; |
21 |
| |
22 |
| import org.apache.commons.logging.Log; |
23 |
| import org.apache.commons.logging.LogFactory; |
24 |
| import org.apache.lucene.analysis.Analyzer; |
25 |
| import org.apache.lucene.document.Document; |
26 |
| import org.apache.lucene.document.Field; |
27 |
| import org.apache.lucene.index.IndexReader; |
28 |
| import org.apache.lucene.index.IndexWriter; |
29 |
| import org.apache.lucene.index.Term; |
30 |
| import org.apache.lucene.queryParser.ParseException; |
31 |
| import org.apache.lucene.queryParser.QueryParser; |
32 |
| import org.apache.lucene.search.*; |
33 |
| import org.apache.lucene.store.SimpleFSDirectory; |
34 |
| import org.apache.xindice.core.Collection; |
35 |
| import org.apache.xindice.core.DBException; |
36 |
| import org.apache.xindice.core.DBObject; |
37 |
| import org.apache.xindice.core.FaultCodes; |
38 |
| import org.apache.xindice.core.data.Key; |
39 |
| import org.apache.xindice.core.query.CompilationException; |
40 |
| import org.apache.xindice.core.query.ProcessingException; |
41 |
| import org.apache.xindice.util.Configuration; |
42 |
| import org.apache.xindice.util.StringUtilities; |
43 |
| import org.apache.xindice.util.XindiceException; |
44 |
| |
45 |
| import java.io.File; |
46 |
| import java.io.IOException; |
47 |
| import java.util.HashMap; |
48 |
| import java.util.Iterator; |
49 |
| |
50 |
| |
51 |
| |
52 |
| |
53 |
| |
54 |
| |
55 |
| |
56 |
| |
57 |
| |
58 |
| |
59 |
| |
60 |
| |
61 |
| |
62 |
| |
63 |
| |
64 |
| |
65 |
| |
66 |
| |
67 |
| |
68 |
| |
69 |
| |
70 |
| |
71 |
| |
72 |
| |
73 |
| |
74 |
| |
75 |
| |
76 |
| |
77 |
| |
78 |
| public final class LuceneIndexer implements Indexer, DBObject { |
79 |
| |
80 |
| private static final Log log = LogFactory.getLog(LuceneIndexer.class); |
81 |
| |
82 |
| private static final String NAME = "name"; |
83 |
| private static final String PATTERN = "pattern"; |
84 |
| private static final String DEFAULT = "default"; |
85 |
| private static final String ANALYZER = "analyzer"; |
86 |
| private static final String PATTERN_STRING = "pattern"; |
87 |
| private static final String PATTERN_ALIAS = "alias"; |
88 |
| |
89 |
| public static final String KEYNAME = "key"; |
90 |
| |
91 |
| |
92 |
| public static final String DEFANALYZER = "org.apache.lucene.analysis.SimpleAnalyzer"; |
93 |
| |
94 |
| private SimpleFSDirectory idxFile; |
95 |
| private IndexWriter iw; |
96 |
| private Analyzer an; |
97 |
| |
98 |
| |
99 |
| |
100 |
| |
101 |
| |
102 |
| |
103 |
| |
104 |
| |
105 |
| |
106 |
| private Searcher searcher; |
107 |
| |
108 |
| private Configuration config; |
109 |
| private Collection collection; |
110 |
| |
111 |
| private String name; |
112 |
| private HashMap patterns = new HashMap(); |
113 |
| |
114 |
| |
115 |
| private int docsAdded; |
116 |
| private int docsDeleted; |
117 |
| |
118 |
| private final Object lock = new Object(); |
119 |
| |
120 |
| private String defaultField = ""; |
121 |
| |
122 |
33
| private void setFile(SimpleFSDirectory f) {
|
123 |
33
| idxFile = f;
|
124 |
| } |
125 |
| |
126 |
95
| private SimpleFSDirectory getFile() {
|
127 |
95
| if (null == idxFile) {
|
128 |
0
| throw new IllegalStateException("Not bound to a file");
|
129 |
| } |
130 |
95
| return idxFile;
|
131 |
| } |
132 |
| |
133 |
125
| public String getIndexStyle() {
|
134 |
125
| return STYLE_FULLTEXT;
|
135 |
| } |
136 |
| |
137 |
| |
138 |
| |
139 |
| |
140 |
| |
141 |
| |
142 |
5847
| public IndexPattern[] getPatterns() {
|
143 |
5847
| return (IndexPattern[]) patterns.keySet().toArray(new IndexPattern[0]);
|
144 |
| } |
145 |
| |
146 |
| |
147 |
| |
148 |
| |
149 |
| |
150 |
| |
151 |
| |
152 |
9
| public String getPatternAlias(IndexPattern pattern) {
|
153 |
9
| if (patterns.containsKey(pattern)) {
|
154 |
0
| return (String) patterns.get(pattern);
|
155 |
| } |
156 |
| |
157 |
9
| int match = 0;
|
158 |
9
| IndexPattern matchPattern = null;
|
159 |
9
| for (Iterator i = patterns.keySet().iterator(); i.hasNext(); ) {
|
160 |
9
| IndexPattern p = (IndexPattern) i.next();
|
161 |
9
| int cMatch = pattern.getMatchLevel(p);
|
162 |
9
| if (cMatch > match) {
|
163 |
9
| match = cMatch;
|
164 |
9
| matchPattern = p;
|
165 |
| } |
166 |
| } |
167 |
| |
168 |
9
| return (String) patterns.get(matchPattern);
|
169 |
| } |
170 |
| |
171 |
| |
172 |
| |
173 |
| |
174 |
| |
175 |
| |
176 |
| |
177 |
| |
178 |
| |
179 |
| |
180 |
| |
181 |
| |
182 |
| |
183 |
| |
184 |
| |
185 |
| |
186 |
| |
187 |
| |
188 |
| |
189 |
| |
190 |
| |
191 |
| |
192 |
| |
193 |
| |
194 |
| |
195 |
| |
196 |
| |
197 |
| |
198 |
| |
199 |
| |
200 |
| |
201 |
| |
202 |
| |
203 |
33
| public void setConfig(Configuration config) throws XindiceException {
|
204 |
33
| this.config = config;
|
205 |
33
| try {
|
206 |
33
| name = config.getAttribute(NAME);
|
207 |
33
| String analyzer = config.getAttribute(ANALYZER);
|
208 |
| |
209 |
33
| String anc = StringUtilities.isBlank(analyzer) ? DEFANALYZER : analyzer;
|
210 |
33
| Class c = Class.forName(anc);
|
211 |
33
| an = (Analyzer) c.newInstance();
|
212 |
| |
213 |
33
| Configuration[] patterns = config.getChildren(PATTERN);
|
214 |
33
| if (patterns.length == 0) {
|
215 |
0
| throw new CannotCreateException("Configuration must have at least one pattern");
|
216 |
| } |
217 |
| |
218 |
33
| for (int i = 0; i < patterns.length; i++) {
|
219 |
61
| String name = patterns[i].getAttribute(PATTERN_STRING);
|
220 |
61
| String alias = patterns[i].getAttribute(PATTERN_ALIAS);
|
221 |
61
| this.patterns.put(new IndexPattern(collection.getSymbols(), name, null), alias);
|
222 |
| } |
223 |
| |
224 |
33
| Configuration[] defaults = config.getChildren(DEFAULT);
|
225 |
33
| if (defaults.length > 1) {
|
226 |
0
| throw new CannotCreateException("There may be only one default field");
|
227 |
33
| } else if (defaults.length == 1) {
|
228 |
2
| String alias = defaults[0].getAttribute(PATTERN_ALIAS);
|
229 |
2
| if (this.patterns.values().contains(alias)) {
|
230 |
2
| defaultField = alias;
|
231 |
| } else { |
232 |
0
| throw new CannotCreateException("Alias '" + alias + "' is undefined in configuration");
|
233 |
| } |
234 |
| } |
235 |
| |
236 |
33
| setFile(new SimpleFSDirectory(new File(collection.getCollectionRoot(), name), null));
|
237 |
| } catch (Exception e) { |
238 |
0
| throw new XindiceException(e);
|
239 |
| } |
240 |
| } |
241 |
| |
242 |
0
| public Configuration getConfig() {
|
243 |
0
| return config;
|
244 |
| } |
245 |
| |
246 |
33
| public boolean exists() throws DBException {
|
247 |
33
| try {
|
248 |
33
| return IndexReader.indexExists(idxFile);
|
249 |
| } catch (IOException e) { |
250 |
0
| throw new IndexerException(FaultCodes.GEN_GENERAL_ERROR, "Error accessing index", e);
|
251 |
| } |
252 |
| } |
253 |
| |
254 |
| |
255 |
| |
256 |
| |
257 |
| |
258 |
| |
259 |
| |
260 |
| |
261 |
| |
262 |
33
| public synchronized boolean create() throws DBException {
|
263 |
33
| if (luceneIndexerFound()) {
|
264 |
1
| throw new DuplicateIndexException("Collection can only have one full text index.");
|
265 |
| } |
266 |
32
| openWrite(true);
|
267 |
32
| return true;
|
268 |
| } |
269 |
| |
270 |
33
| private boolean luceneIndexerFound() throws DBException {
|
271 |
33
| String indexers[] = collection.getIndexManager().list();
|
272 |
33
| for (int i = 0; i < indexers.length; i++) {
|
273 |
1
| Indexer indexer = collection.getIndexer(indexers[i]);
|
274 |
1
| if (indexer instanceof LuceneIndexer) {
|
275 |
1
| return true;
|
276 |
| } |
277 |
| } |
278 |
| |
279 |
32
| return false;
|
280 |
| } |
281 |
| |
282 |
32
| public boolean open() throws DBException {
|
283 |
32
| openWrite(false);
|
284 |
32
| return true;
|
285 |
| } |
286 |
| |
287 |
584
| public boolean isOpened() {
|
288 |
584
| return null != iw;
|
289 |
| } |
290 |
| |
291 |
32
| public synchronized boolean close() throws DBException {
|
292 |
32
| closeWrite();
|
293 |
32
| if (searcher != null) {
|
294 |
30
| searcher.close(true);
|
295 |
| } |
296 |
32
| return true;
|
297 |
| } |
298 |
| |
299 |
32
| public boolean drop() throws DBException {
|
300 |
32
| try {
|
301 |
32
| if (IndexReader.indexExists(idxFile)) {
|
302 |
32
| close();
|
303 |
32
| return deepDelete(getFile().getFile());
|
304 |
| } else { |
305 |
0
| return false;
|
306 |
| } |
307 |
| } catch (IOException e) { |
308 |
0
| throw new DBException(FaultCodes.IDX_CORRUPTED,
|
309 |
| "Failed to delete index " + name + ", collection " + collection.getCanonicalName(), e); |
310 |
| } |
311 |
| } |
312 |
| |
313 |
130
| public String getName() {
|
314 |
130
| return name;
|
315 |
| } |
316 |
| |
317 |
33
| public void setCollection(Collection collection) {
|
318 |
33
| this.collection = collection;
|
319 |
| } |
320 |
| |
321 |
72
| public Analyzer getAnalyzer() {
|
322 |
72
| return an;
|
323 |
| } |
324 |
| |
325 |
64
| private void openWrite(boolean create) throws DBException {
|
326 |
64
| if (log.isTraceEnabled()) {
|
327 |
0
| log.trace("Calling openWrite(" + create + ")");
|
328 |
| } |
329 |
| |
330 |
64
| try {
|
331 |
64
| if (iw == null) {
|
332 |
32
| iw = new IndexWriter(getFile(), getAnalyzer(), create, IndexWriter.MaxFieldLength.UNLIMITED);
|
333 |
| } |
334 |
| } catch (IOException e) { |
335 |
0
| if (create) {
|
336 |
0
| throw new DBException(FaultCodes.IDX_CANNOT_CREATE,
|
337 |
| "Failed to cleate index " + name + ", collection " + collection.getCanonicalName(), e); |
338 |
| } else { |
339 |
0
| throw new DBException(FaultCodes.IDX_CORRUPTED,
|
340 |
| "Failed to open index " + name + ", collection " + collection.getCanonicalName(), e); |
341 |
| } |
342 |
| } |
343 |
| } |
344 |
| |
345 |
584
| private void assertOpen() {
|
346 |
584
| if (!isOpened()) {
|
347 |
0
| throw new IllegalStateException("Index has not been opened");
|
348 |
| } |
349 |
| } |
350 |
| |
351 |
32
| private void closeWrite() throws DBException {
|
352 |
32
| if (null != iw) {
|
353 |
32
| try {
|
354 |
32
| iw.close();
|
355 |
32
| iw = null;
|
356 |
| } catch (IOException e) { |
357 |
0
| throw new DBException(FaultCodes.IDX_CORRUPTED,
|
358 |
| "Failed to close writer for index " + name + ", collection " + collection.getCanonicalName(), e); |
359 |
| } |
360 |
| } |
361 |
| } |
362 |
| |
363 |
150
| private boolean deepDelete(File f) throws IOException {
|
364 |
150
| if (f.isDirectory()) {
|
365 |
32
| File fl[] = f.listFiles();
|
366 |
32
| for (int i = 0; i < fl.length; i++) {
|
367 |
118
| if (!deepDelete(fl[i])) {
|
368 |
0
| return false;
|
369 |
| } |
370 |
| } |
371 |
| } |
372 |
150
| return f.delete();
|
373 |
| } |
374 |
| |
375 |
285
| public void flush() throws DBException {
|
376 |
285
| try {
|
377 |
285
| assertOpen();
|
378 |
285
| if (iw != null) {
|
379 |
285
| iw.commit();
|
380 |
| |
381 |
285
| int nDocs = iw.maxDoc();
|
382 |
| |
383 |
| |
384 |
| |
385 |
285
| synchronized(lock) {
|
386 |
285
| if (docsAdded > nDocs / 10 || docsAdded > 50 || docsDeleted > 10) {
|
387 |
137
| if (log.isDebugEnabled()) {
|
388 |
0
| log.debug("Optimizing text index for " + collection.getCanonicalName() + "...");
|
389 |
| } |
390 |
| |
391 |
137
| iw.optimize();
|
392 |
137
| docsAdded = 0;
|
393 |
137
| docsDeleted = 0;
|
394 |
| } |
395 |
| } |
396 |
| |
397 |
| } |
398 |
| } catch (IOException e) { |
399 |
0
| throw new DBException(FaultCodes.IDX_CORRUPTED,
|
400 |
| "Could not force unwritten data to disk for index " + name + ", collection " + collection.getCanonicalName(), e); |
401 |
| } |
402 |
| } |
403 |
| |
404 |
| |
405 |
| |
406 |
| |
407 |
| |
408 |
| |
409 |
| |
410 |
| |
411 |
1049
| public IndexerEventHandler getIndexerEventHandler() {
|
412 |
1049
| return new BasicIndexerEventHandler() {
|
413 |
| Document doc; |
414 |
| |
415 |
922
| public void onDocumentAdded(Key key) throws DBException {
|
416 |
922
| if (doc != null) {
|
417 |
172
| assertOpen();
|
418 |
| |
419 |
172
| try {
|
420 |
172
| iw.addDocument(doc);
|
421 |
172
| synchronized(lock) {
|
422 |
172
| docsAdded++;
|
423 |
| } |
424 |
| } catch (IOException e) { |
425 |
0
| throw new DBException(FaultCodes.IDX_CORRUPTED,
|
426 |
| "Failed to add document to the index " + name + ", collection " + collection.getCanonicalName(), e); |
427 |
| } |
428 |
| } |
429 |
| } |
430 |
| |
431 |
127
| public void onDocumentDeleted(Key key) throws DBException {
|
432 |
127
| assertOpen();
|
433 |
| |
434 |
127
| try {
|
435 |
127
| iw.deleteDocuments(new Term(KEYNAME, key.toString()));
|
436 |
127
| synchronized(lock) {
|
437 |
127
| docsDeleted++;
|
438 |
| } |
439 |
| } catch (IOException e) { |
440 |
0
| throw new DBException(FaultCodes.IDX_CORRUPTED,
|
441 |
| "Failed to delete document from the index " + name + ", collection " + collection.getCanonicalName(), e); |
442 |
| } |
443 |
| } |
444 |
| |
445 |
370
| public void onValueAdded(IndexPattern pattern, String value, Key key, int pos, int len, short elemID, short attrID) {
|
446 |
370
| if (doc == null) {
|
447 |
172
| doc = new Document();
|
448 |
172
| doc.add(new Field(KEYNAME, key.toString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
|
449 |
| } |
450 |
| |
451 |
370
| String field = (String) patterns.get(pattern);
|
452 |
370
| doc.add(new Field(field, value, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES));
|
453 |
| } |
454 |
| }; |
455 |
| } |
456 |
| |
457 |
6
| public IndexMatch[] queryMatches(final IndexQuery query) throws DBException {
|
458 |
| |
459 |
6
| if (query.getOperator() != IndexQuery.TQ) {
|
460 |
0
| return null;
|
461 |
| } |
462 |
| |
463 |
6
| String textQuery = query.getValue(0).toString();
|
464 |
6
| try {
|
465 |
6
| return queryMatches(new QueryParser(defaultField, getAnalyzer()).parse(textQuery));
|
466 |
| } catch (ParseException e) { |
467 |
0
| throw new CompilationException("Failed to parse query '" + textQuery + "'", e);
|
468 |
| } |
469 |
| } |
470 |
| |
471 |
| |
472 |
| |
473 |
| |
474 |
| |
475 |
| |
476 |
| |
477 |
| |
478 |
| |
479 |
40
| public IndexMatch[] queryMatches(Query query) throws DBException {
|
480 |
40
| IndexMatch[] matches = null;
|
481 |
40
| Searcher searcher = getSearcher();
|
482 |
| |
483 |
40
| try {
|
484 |
40
| TopDocs docs = searcher.is.search(query, searcher.ir.numDocs());
|
485 |
40
| matches = new IndexMatch[docs.scoreDocs.length];
|
486 |
| |
487 |
40
| for (int i = 0; i < docs.scoreDocs.length; i++) {
|
488 |
85
| int doc = docs.scoreDocs[i].doc;
|
489 |
85
| Key key = new Key(searcher.ir.document(doc).getField(KEYNAME).stringValue());
|
490 |
85
| matches[i] = new IndexMatch(key, -1, -1);
|
491 |
| } |
492 |
| } catch (IOException e) { |
493 |
0
| throw new ProcessingException("Failed to process a query", e);
|
494 |
| } finally { |
495 |
40
| searcher.free();
|
496 |
| } |
497 |
| |
498 |
40
| return matches;
|
499 |
| } |
500 |
| |
501 |
| |
502 |
| |
503 |
| |
504 |
| |
505 |
| |
506 |
| |
507 |
| |
508 |
| |
509 |
| |
510 |
40
| private synchronized Searcher getSearcher() throws DBException {
|
511 |
| |
512 |
40
| if (searcher != null && !searcher.isCurrent()) {
|
513 |
1
| searcher.close(false);
|
514 |
1
| searcher = null;
|
515 |
| } |
516 |
| |
517 |
40
| if (searcher == null) {
|
518 |
31
| searcher = new Searcher();
|
519 |
| } else { |
520 |
9
| searcher.incRef();
|
521 |
| } |
522 |
| |
523 |
40
| return searcher;
|
524 |
| } |
525 |
| |
526 |
| private class Searcher { |
527 |
| private IndexReader ir; |
528 |
| private IndexSearcher is; |
529 |
| |
530 |
| |
531 |
| private int ref = 1; |
532 |
| |
533 |
31
| public Searcher() throws DBException {
|
534 |
31
| try {
|
535 |
31
| ir = IndexReader.open(getFile(), true);
|
536 |
31
| is = new IndexSearcher(ir);
|
537 |
| } catch (IOException e) { |
538 |
0
| throw new DBException(FaultCodes.IDX_CORRUPTED,
|
539 |
| "Failed to open access " + name + ", collection " + collection.getCanonicalName(), e); |
540 |
| } |
541 |
| } |
542 |
| |
543 |
10
| public boolean isCurrent() throws DBException {
|
544 |
10
| try {
|
545 |
10
| return ir.isCurrent();
|
546 |
| } catch (IOException e) { |
547 |
0
| throw new DBException(FaultCodes.IDX_CORRUPTED,
|
548 |
| "Failed to access index " + name + ", collection " + collection.getCanonicalName(), e); |
549 |
| } |
550 |
| } |
551 |
| |
552 |
9
| public void incRef() {
|
553 |
9
| ref++;
|
554 |
| } |
555 |
| |
556 |
| |
557 |
| |
558 |
| |
559 |
| |
560 |
| |
561 |
| |
562 |
| |
563 |
| |
564 |
| |
565 |
| |
566 |
40
| public void free() throws DBException {
|
567 |
40
| synchronized (LuceneIndexer.this) {
|
568 |
40
| ref--;
|
569 |
| |
570 |
40
| if (searcher != this) {
|
571 |
0
| close(false);
|
572 |
| } |
573 |
| } |
574 |
| } |
575 |
| |
576 |
| |
577 |
| |
578 |
| |
579 |
| |
580 |
| |
581 |
| |
582 |
31
| public void close(boolean force) throws DBException {
|
583 |
31
| try {
|
584 |
31
| if (ref == 0 || force) {
|
585 |
31
| is.close();
|
586 |
31
| ir.close();
|
587 |
| } |
588 |
| } catch (IOException e) { |
589 |
0
| throw new DBException(FaultCodes.IDX_CORRUPTED,
|
590 |
| "Failed to access index " + name + ", collection " + collection.getCanonicalName(), e); |
591 |
| } |
592 |
| } |
593 |
| |
594 |
| |
595 |
| |
596 |
| |
597 |
| |
598 |
| |
599 |
| |
600 |
| |
601 |
0
| public TopDocs search(Query query) throws DBException {
|
602 |
0
| try {
|
603 |
0
| return is.search(query, ir.numDocs());
|
604 |
| } catch (IOException e) { |
605 |
0
| throw new DBException(FaultCodes.IDX_CORRUPTED,
|
606 |
| "Failed to access index " + name + ", collection " + collection.getCanonicalName(), e); |
607 |
| } |
608 |
| } |
609 |
| } |
610 |
| } |