Clover coverage report -

Coverage timestamp: Sun Nov 1 2009 23:08:24 UTC

FRAMES NO FRAMES

file stats:	LOC:	610		Methods:	35
	NCLOC:	375		Classes:	2

Source file

Conditionals

Statements

Methods

TOTAL

LuceneIndexer.java

72.1%

81.7%

94.3%

80.9%

1		/*
2		* Licensed to the Apache Software Foundation (ASF) under one or more
3		* contributor license agreements. See the NOTICE file distributed with
4		* this work for additional information regarding copyright ownership.
5		* The ASF licenses this file to You under the Apache License, Version 2.0
6		* (the "License"); you may not use this file except in compliance with
7		* the License. You may obtain a copy of the License at
8		*
9		* http://www.apache.org/licenses/LICENSE-2.0
10		*
11		* Unless required by applicable law or agreed to in writing, software
12		* distributed under the License is distributed on an "AS IS" BASIS,
13		* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14		* See the License for the specific language governing permissions and
15		* limitations under the License.
16		*
17		* $Id: LuceneIndexer.java 821662 2009-10-05 02:09:28Z natalia $
18		*/
19
20		package org.apache.xindice.core.indexer;
21
22		import org.apache.commons.logging.Log;
23		import org.apache.commons.logging.LogFactory;
24		import org.apache.lucene.analysis.Analyzer;
25		import org.apache.lucene.document.Document;
26		import org.apache.lucene.document.Field;
27		import org.apache.lucene.index.IndexReader;
28		import org.apache.lucene.index.IndexWriter;
29		import org.apache.lucene.index.Term;
30		import org.apache.lucene.queryParser.ParseException;
31		import org.apache.lucene.queryParser.QueryParser;
32		import org.apache.lucene.search.*;
33		import org.apache.lucene.store.SimpleFSDirectory;
34		import org.apache.xindice.core.Collection;
35		import org.apache.xindice.core.DBException;
36		import org.apache.xindice.core.DBObject;
37		import org.apache.xindice.core.FaultCodes;
38		import org.apache.xindice.core.data.Key;
39		import org.apache.xindice.core.query.CompilationException;
40		import org.apache.xindice.core.query.ProcessingException;
41		import org.apache.xindice.util.Configuration;
42		import org.apache.xindice.util.StringUtilities;
43		import org.apache.xindice.util.XindiceException;
44
45		import java.io.File;
46		import java.io.IOException;
47		import java.util.HashMap;
48		import java.util.Iterator;
49
50		/**
51		* LuceneIndexer is used for maintaining full text indexes. It operates on
52		* documents instead of elements and allows to search for documents using
53		* native Lucene query. There can be only one LuceneIndexer per collection,
54		* however, it may have more than one IndexPattern.<p>
55		*
56		* Every IndexPattern corresponds to a Lucene document field. For every Xindice
57		* document, value of all matching elements will be indexed by a single Lucene
58		* document, allowing to search across the patterns.</p><p>
59		*
60		* Sample LuceneIndexer configuration:
61		* <pre>
62		* <index name='fulltext' class='org.apache.xindice.core.indexer.LuceneIndexer'
63		* analyzer='org.apache.lucene.analysis.SimpleAnalyzer'>
64		* <pattern pattern='meta@title' alias='title'/>
65		* <pattern pattern='description' alias='text'/>
66		* </index></pre></p><p>
67		*
68		* To search over this sample index, one could issue a query <code>"title:tutorial
69		* AND text:xml"</code>.</p><p>
70		*
71		* For more details about LuceneIndexer configuration please see documentation for
72		* {@link #setConfig(org.apache.xindice.util.Configuration)}
73		* </p>
74		*
75		* @author Andy Armstrong
76		* @version $Revision: 821662 $, $Date: 2009-10-05 02:09:28 +0000 (Mon, 05 Oct 2009) $
77		*/
78		public final class LuceneIndexer implements Indexer, DBObject {
79
80		private static final Log log = LogFactory.getLog(LuceneIndexer.class);
81
82		private static final String NAME = "name";
83		private static final String PATTERN = "pattern";
84		private static final String DEFAULT = "default";
85		private static final String ANALYZER = "analyzer";
86		private static final String PATTERN_STRING = "pattern";
87		private static final String PATTERN_ALIAS = "alias";
88
89		public static final String KEYNAME = "key";
90
91		// Default analyzer to use
92		public static final String DEFANALYZER = "org.apache.lucene.analysis.SimpleAnalyzer";
93
94		private SimpleFSDirectory idxFile;
95		private IndexWriter iw;
96		private Analyzer an;
97
98		/**
99		* Most recently opened searcher. The same Searcher instance is going to
100		* be used for all the searches unless index has changed and new Searcher
101		* is required to access the changes.
102		*
103		* Searcher cannot be closed if it is being used (if there is a query in
104		* progress or hits are iterated).
105		*/
106		private Searcher searcher;
107
108		private Configuration config;
109		private Collection collection;
110
111		private String name;
112		private HashMap patterns = new HashMap();
113
114		// Keep a count of changes to the index
115		private int docsAdded;
116		private int docsDeleted;
117
118		private final Object lock = new Object();
119
120		private String defaultField = "";
121
122	33	private void setFile(SimpleFSDirectory f) {
123	33	idxFile = f;
124		}
125
126	95	private SimpleFSDirectory getFile() {
127	95	if (null == idxFile) {
128	0	throw new IllegalStateException("Not bound to a file");
129		}
130	95	return idxFile;
131		}
132
133	125	public String getIndexStyle() {
134	125	return STYLE_FULLTEXT;
135		}
136
137		/**
138		* Returns this Indexer's patterns. LuceneIndexer may have more than one
139		* pattern.
140		* @return Indexer's patterns
141		*/
142	5847	public IndexPattern[] getPatterns() {
143	5847	return (IndexPattern[]) patterns.keySet().toArray(new IndexPattern[0]);
144		}
145
146		/**
147		* Return alias for the given pattern. If this exact pattern is not indexed,
148		* method will look for matching indexed pattern.
149		* @param pattern IndexPattern
150		* @return Alias for the closest matching pattern or null, if there is none
151		*/
152	9	public String getPatternAlias(IndexPattern pattern) {
153	9	if (patterns.containsKey(pattern)) {
154	0	return (String) patterns.get(pattern);
155		}
156
157	9	int match = 0;
158	9	IndexPattern matchPattern = null;
159	9	for (Iterator i = patterns.keySet().iterator(); i.hasNext(); ) {
160	9	IndexPattern p = (IndexPattern) i.next();
161	9	int cMatch = pattern.getMatchLevel(p);
162	9	if (cMatch > match) {
163	9	match = cMatch;
164	9	matchPattern = p;
165		}
166		}
167
168	9	return (String) patterns.get(matchPattern);
169		}
170
171		/**
172		* Configures LuceneIndexer instance.
173		* <dl>
174		* <dt>index
175		* <dd>Top Indexer configuration element. Can have one or more pattern
176		* child elements. Its attributes:
177		*
178		* <ul><li>name - Indexer name. Required.
179		* <li>class - Indexer class. Required.
180		* org.apache.xindice.core.indexer.LuceneIndexer for full text index.
181		* <li>analyzer - Analyzer to use for indexing. Optional,
182		* org.apache.lucene.analysis.SimpleAnalyzer by default.</ul>
183		*
184		* <dl><dt>pattern
185		* <dd>Child element. Indexer must have at least one pattern. Its
186		* attributes:
187		* <ul><li>pattern - IndexPattern. For acceptable formats, see
188		* {@link org.apache.xindice.core.indexer.Indexer#getPatterns()}
189		* <li>alias - Name of the field to store/search values for that pattern.
190		* </dl>
191		* <dl><dt>default
192		* <dd>Child element. Optional. Its attributes:
193		* <li>alias - Indicates the pattern alias that will be used as
194		* the default field for search. If omitted, search query has to include
195		* field name for all terms, there will be no default.
196		* </ul></dl>
197		* </dl>
198		*
199		* @param config Configuration to apply
200		* @throws XindiceException Configuration does not have required information,
201		* Analyzer could not have been instantiated.
202		*/
203	33	public void setConfig(Configuration config) throws XindiceException {
204	33	this.config = config;
205	33	try {
206	33	name = config.getAttribute(NAME);
207	33	String analyzer = config.getAttribute(ANALYZER);
208
209	33	String anc = StringUtilities.isBlank(analyzer) ? DEFANALYZER : analyzer;
210	33	Class c = Class.forName(anc);
211	33	an = (Analyzer) c.newInstance();
212
213	33	Configuration[] patterns = config.getChildren(PATTERN);
214	33	if (patterns.length == 0) {
215	0	throw new CannotCreateException("Configuration must have at least one pattern");
216		}
217
218	33	for (int i = 0; i < patterns.length; i++) {
219	61	String name = patterns[i].getAttribute(PATTERN_STRING);
220	61	String alias = patterns[i].getAttribute(PATTERN_ALIAS);
221	61	this.patterns.put(new IndexPattern(collection.getSymbols(), name, null), alias);
222		}
223
224	33	Configuration[] defaults = config.getChildren(DEFAULT);
225	33	if (defaults.length > 1) {
226	0	throw new CannotCreateException("There may be only one default field");
227	33	} else if (defaults.length == 1) {
228	2	String alias = defaults[0].getAttribute(PATTERN_ALIAS);
229	2	if (this.patterns.values().contains(alias)) {
230	2	defaultField = alias;
231		} else {
232	0	throw new CannotCreateException("Alias '" + alias + "' is undefined in configuration");
233		}
234		}
235
236	33	setFile(new SimpleFSDirectory(new File(collection.getCollectionRoot(), name), null));
237		} catch (Exception e) {
238	0	throw new XindiceException(e);
239		}
240		}
241
242	0	public Configuration getConfig() {
243	0	return config;
244		}
245
246	33	public boolean exists() throws DBException {
247	33	try {
248	33	return IndexReader.indexExists(idxFile);
249		} catch (IOException e) {
250	0	throw new IndexerException(FaultCodes.GEN_GENERAL_ERROR, "Error accessing index", e);
251		}
252		}
253
254		/**
255		* Creates necessary resources.
256		*
257		* @return true, if successful
258		* @throws DBException The was low-level IOException that prevented index
259		* from creating resources.
260		* @throws DuplicateIndexException Parent collection already has full text index
261		*/
262	33	public synchronized boolean create() throws DBException {
263	33	if (luceneIndexerFound()) {
264	1	throw new DuplicateIndexException("Collection can only have one full text index.");
265		}
266	32	openWrite(true);
267	32	return true;
268		}
269
270	33	private boolean luceneIndexerFound() throws DBException {
271	33	String indexers[] = collection.getIndexManager().list();
272	33	for (int i = 0; i < indexers.length; i++) {
273	1	Indexer indexer = collection.getIndexer(indexers[i]);
274	1	if (indexer instanceof LuceneIndexer) {
275	1	return true;
276		}
277		}
278
279	32	return false;
280		}
281
282	32	public boolean open() throws DBException {
283	32	openWrite(false);
284	32	return true;
285		}
286
287	584	public boolean isOpened() {
288	584	return null != iw;
289		}
290
291	32	public synchronized boolean close() throws DBException {
292	32	closeWrite();
293	32	if (searcher != null) {
294	30	searcher.close(true);
295		}
296	32	return true;
297		}
298
299	32	public boolean drop() throws DBException {
300	32	try {
301	32	if (IndexReader.indexExists(idxFile)) {
302	32	close();
303	32	return deepDelete(getFile().getFile());
304		} else {
305	0	return false;
306		}
307		} catch (IOException e) {
308	0	throw new DBException(FaultCodes.IDX_CORRUPTED,
309		"Failed to delete index " + name + ", collection " + collection.getCanonicalName(), e);
310		}
311		}
312
313	130	public String getName() {
314	130	return name;
315		}
316
317	33	public void setCollection(Collection collection) {
318	33	this.collection = collection;
319		}
320
321	72	public Analyzer getAnalyzer() {
322	72	return an;
323		}
324
325	64	private void openWrite(boolean create) throws DBException {
326	64	if (log.isTraceEnabled()) {
327	0	log.trace("Calling openWrite(" + create + ")");
328		}
329
330	64	try {
331	64	if (iw == null) {
332	32	iw = new IndexWriter(getFile(), getAnalyzer(), create, IndexWriter.MaxFieldLength.UNLIMITED);
333		}
334		} catch (IOException e) {
335	0	if (create) {
336	0	throw new DBException(FaultCodes.IDX_CANNOT_CREATE,
337		"Failed to cleate index " + name + ", collection " + collection.getCanonicalName(), e);
338		} else {
339	0	throw new DBException(FaultCodes.IDX_CORRUPTED,
340		"Failed to open index " + name + ", collection " + collection.getCanonicalName(), e);
341		}
342		}
343		}
344
345	584	private void assertOpen() {
346	584	if (!isOpened()) {
347	0	throw new IllegalStateException("Index has not been opened");
348		}
349		}
350
351	32	private void closeWrite() throws DBException {
352	32	if (null != iw) {
353	32	try {
354	32	iw.close();
355	32	iw = null;
356		} catch (IOException e) {
357	0	throw new DBException(FaultCodes.IDX_CORRUPTED,
358		"Failed to close writer for index " + name + ", collection " + collection.getCanonicalName(), e);
359		}
360		}
361		}
362
363	150	private boolean deepDelete(File f) throws IOException {
364	150	if (f.isDirectory()) {
365	32	File fl[] = f.listFiles();
366	32	for (int i = 0; i < fl.length; i++) {
367	118	if (!deepDelete(fl[i])) {
368	0	return false;
369		}
370		}
371		}
372	150	return f.delete();
373		}
374
375	285	public void flush() throws DBException {
376	285	try {
377	285	assertOpen();
378	285	if (iw != null) {
379	285	iw.commit();
380
381	285	int nDocs = iw.maxDoc();
382		/* Fairly arbitrary rules for triggering index optimisation. Need to
383		* play with these.
384		*/
385	285	synchronized(lock) {
386	285	if (docsAdded > nDocs / 10 \|\| docsAdded > 50 \|\| docsDeleted > 10) {
387	137	if (log.isDebugEnabled()) {
388	0	log.debug("Optimizing text index for " + collection.getCanonicalName() + "...");
389		}
390
391	137	iw.optimize();
392	137	docsAdded = 0;
393	137	docsDeleted = 0;
394		}
395		}
396
397		}
398		} catch (IOException e) {
399	0	throw new DBException(FaultCodes.IDX_CORRUPTED,
400		"Could not force unwritten data to disk for index " + name + ", collection " + collection.getCanonicalName(), e);
401		}
402		}
403
404		/**
405		* Creates new instance of a handler to listen to indexer events. For
406		* every document that being added there will be a separate handler
407		* that will assemble all relevant values in a single Lucene document.
408		*
409		* @return new instance of IndexerEventHandler
410		*/
411	1049	public IndexerEventHandler getIndexerEventHandler() {
412	1049	return new BasicIndexerEventHandler() {
413		Document doc;
414
415	922	public void onDocumentAdded(Key key) throws DBException {
416	922	if (doc != null) {
417	172	assertOpen();
418
419	172	try {
420	172	iw.addDocument(doc);
421	172	synchronized(lock) {
422	172	docsAdded++;
423		}
424		} catch (IOException e) {
425	0	throw new DBException(FaultCodes.IDX_CORRUPTED,
426		"Failed to add document to the index " + name + ", collection " + collection.getCanonicalName(), e);
427		}
428		}
429		}
430
431	127	public void onDocumentDeleted(Key key) throws DBException {
432	127	assertOpen();
433
434	127	try {
435	127	iw.deleteDocuments(new Term(KEYNAME, key.toString()));
436	127	synchronized(lock) {
437	127	docsDeleted++;
438		}
439		} catch (IOException e) {
440	0	throw new DBException(FaultCodes.IDX_CORRUPTED,
441		"Failed to delete document from the index " + name + ", collection " + collection.getCanonicalName(), e);
442		}
443		}
444
445	370	public void onValueAdded(IndexPattern pattern, String value, Key key, int pos, int len, short elemID, short attrID) {
446	370	if (doc == null) {
447	172	doc = new Document();
448	172	doc.add(new Field(KEYNAME, key.toString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
449		}
450
451	370	String field = (String) patterns.get(pattern);
452	370	doc.add(new Field(field, value, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES));
453		}
454		};
455		}
456
457	6	public IndexMatch[] queryMatches(final IndexQuery query) throws DBException {
458		// this indexer only supports text queries
459	6	if (query.getOperator() != IndexQuery.TQ) {
460	0	return null;
461		}
462
463	6	String textQuery = query.getValue(0).toString();
464	6	try {
465	6	return queryMatches(new QueryParser(defaultField, getAnalyzer()).parse(textQuery));
466		} catch (ParseException e) {
467	0	throw new CompilationException("Failed to parse query '" + textQuery + "'", e);
468		}
469		}
470
471		/**
472		* Same as {@link Indexer#queryMatches(IndexQuery)}, but accepts compiled Lucene query as
473		* parameter.
474		*
475		* @param query Compiled Lucene query.
476		* @return The resulting matches
477		* @throws DBException if IOException prevented indexer from executing the query.
478		*/
479	40	public IndexMatch[] queryMatches(Query query) throws DBException {
480	40	IndexMatch[] matches = null;
481	40	Searcher searcher = getSearcher();
482
483	40	try {
484	40	TopDocs docs = searcher.is.search(query, searcher.ir.numDocs());
485	40	matches = new IndexMatch[docs.scoreDocs.length];
486
487	40	for (int i = 0; i < docs.scoreDocs.length; i++) {
488	85	int doc = docs.scoreDocs[i].doc;
489	85	Key key = new Key(searcher.ir.document(doc).getField(KEYNAME).stringValue());
490	85	matches[i] = new IndexMatch(key, -1, -1);
491		}
492		} catch (IOException e) {
493	0	throw new ProcessingException("Failed to process a query", e);
494		} finally {
495	40	searcher.free();
496		}
497
498	40	return matches;
499		}
500
501		/**
502		* getSearcher returns Searcher that uses current version of the index.
503		* If index has been modified since last time searcher was requested
504		* this method will create new Searcher instance, otherwise it will
505		* return Searcher instance it created previously.
506		*
507		* @return current Searcher
508		* @throws DBException If index could not be accessed
509		*/
510	40	private synchronized Searcher getSearcher() throws DBException {
511
512	40	if (searcher != null && !searcher.isCurrent()) {
513	1	searcher.close(false);
514	1	searcher = null;
515		}
516
517	40	if (searcher == null) {
518	31	searcher = new Searcher();
519		} else {
520	9	searcher.incRef();
521		}
522
523	40	return searcher;
524		}
525
526		private class Searcher {
527		private IndexReader ir;
528		private IndexSearcher is;
529
530		// number of searches in progress using that searcher
531		private int ref = 1;
532
533	31	public Searcher() throws DBException {
534	31	try {
535	31	ir = IndexReader.open(getFile(), true);
536	31	is = new IndexSearcher(ir);
537		} catch (IOException e) {
538	0	throw new DBException(FaultCodes.IDX_CORRUPTED,
539		"Failed to open access " + name + ", collection " + collection.getCanonicalName(), e);
540		}
541		}
542
543	10	public boolean isCurrent() throws DBException {
544	10	try {
545	10	return ir.isCurrent();
546		} catch (IOException e) {
547	0	throw new DBException(FaultCodes.IDX_CORRUPTED,
548		"Failed to access index " + name + ", collection " + collection.getCanonicalName(), e);
549		}
550		}
551
552	9	public void incRef() {
553	9	ref++;
554		}
555
556		/**
557		* This method must be called after executing text query to cleanup
558		* resources that are not in use anymore. It decrements number of
559		* searches referencing this searcher and then attempts to close it
560		* unless it is the most recently opened searcher. If there were no
561		* searchers opened after this one, the searcher will be kept open
562		* for future use, even if it is not used at the moment.
563		*
564		* @throws DBException if there was IOException
565		*/
566	40	public void free() throws DBException {
567	40	synchronized (LuceneIndexer.this) {
568	40	ref--;
569
570	40	if (searcher != this) {
571	0	close(false);
572		}
573		}
574		}
575
576		/**
577		* Closes the searcher if it is not used in any search.
578		*
579		* @param force true if searcher has to be closed even if it is used
580		* @throws DBException if there was IOException
581		*/
582	31	public void close(boolean force) throws DBException {
583	31	try {
584	31	if (ref == 0 \|\| force) {
585	31	is.close();
586	31	ir.close();
587		}
588		} catch (IOException e) {
589	0	throw new DBException(FaultCodes.IDX_CORRUPTED,
590		"Failed to access index " + name + ", collection " + collection.getCanonicalName(), e);
591		}
592		}
593
594		/**
595		* Internal search method.
596		* @param query Search query
597		* @return TopDocs for the search
598		* @throws DBException Index could not be accessed
599		* @deprecated Deprecated following Lucene changes
600		*/
601	0	public TopDocs search(Query query) throws DBException {
602	0	try {
603	0	return is.search(query, ir.numDocs());
604		} catch (IOException e) {
605	0	throw new DBException(FaultCodes.IDX_CORRUPTED,
606		"Failed to access index " + name + ", collection " + collection.getCanonicalName(), e);
607		}
608		}
609		}
610		}