Clover coverage report -
Coverage timestamp: Sun Nov 1 2009 23:08:24 UTC
file stats: LOC: 610   Methods: 35
NCLOC: 375   Classes: 2
 
 Source file Conditionals Statements Methods TOTAL
LuceneIndexer.java 72.1% 81.7% 94.3% 80.9%
coverage coverage
 1    /*
 2    * Licensed to the Apache Software Foundation (ASF) under one or more
 3    * contributor license agreements. See the NOTICE file distributed with
 4    * this work for additional information regarding copyright ownership.
 5    * The ASF licenses this file to You under the Apache License, Version 2.0
 6    * (the "License"); you may not use this file except in compliance with
 7    * the License. You may obtain a copy of the License at
 8    *
 9    * http://www.apache.org/licenses/LICENSE-2.0
 10    *
 11    * Unless required by applicable law or agreed to in writing, software
 12    * distributed under the License is distributed on an "AS IS" BASIS,
 13    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14    * See the License for the specific language governing permissions and
 15    * limitations under the License.
 16    *
 17    * $Id: LuceneIndexer.java 821662 2009-10-05 02:09:28Z natalia $
 18    */
 19   
 20    package org.apache.xindice.core.indexer;
 21   
 22    import org.apache.commons.logging.Log;
 23    import org.apache.commons.logging.LogFactory;
 24    import org.apache.lucene.analysis.Analyzer;
 25    import org.apache.lucene.document.Document;
 26    import org.apache.lucene.document.Field;
 27    import org.apache.lucene.index.IndexReader;
 28    import org.apache.lucene.index.IndexWriter;
 29    import org.apache.lucene.index.Term;
 30    import org.apache.lucene.queryParser.ParseException;
 31    import org.apache.lucene.queryParser.QueryParser;
 32    import org.apache.lucene.search.*;
 33    import org.apache.lucene.store.SimpleFSDirectory;
 34    import org.apache.xindice.core.Collection;
 35    import org.apache.xindice.core.DBException;
 36    import org.apache.xindice.core.DBObject;
 37    import org.apache.xindice.core.FaultCodes;
 38    import org.apache.xindice.core.data.Key;
 39    import org.apache.xindice.core.query.CompilationException;
 40    import org.apache.xindice.core.query.ProcessingException;
 41    import org.apache.xindice.util.Configuration;
 42    import org.apache.xindice.util.StringUtilities;
 43    import org.apache.xindice.util.XindiceException;
 44   
 45    import java.io.File;
 46    import java.io.IOException;
 47    import java.util.HashMap;
 48    import java.util.Iterator;
 49   
 50    /**
 51    * LuceneIndexer is used for maintaining full text indexes. It operates on
 52    * documents instead of elements and allows to search for documents using
 53    * native Lucene query. There can be only one LuceneIndexer per collection,
 54    * however, it may have more than one IndexPattern.<p>
 55    *
 56    * Every IndexPattern corresponds to a Lucene document field. For every Xindice
 57    * document, value of all matching elements will be indexed by a single Lucene
 58    * document, allowing to search across the patterns.</p><p>
 59    *
 60    * Sample LuceneIndexer configuration:
 61    * <pre>
 62    * &lt;index name='fulltext' class='org.apache.xindice.core.indexer.LuceneIndexer'
 63    * analyzer='org.apache.lucene.analysis.SimpleAnalyzer'&gt;
 64    * &lt;pattern pattern='meta@title' alias='title'/&gt;
 65    * &lt;pattern pattern='description' alias='text'/&gt;
 66    * &lt;/index&gt;</pre></p><p>
 67    *
 68    * To search over this sample index, one could issue a query <code>"title:tutorial
 69    * AND text:xml"</code>.</p><p>
 70    *
 71    * For more details about LuceneIndexer configuration please see documentation for
 72    * {@link #setConfig(org.apache.xindice.util.Configuration)}
 73    * </p>
 74    *
 75    * @author Andy Armstrong
 76    * @version $Revision: 821662 $, $Date: 2009-10-05 02:09:28 +0000 (Mon, 05 Oct 2009) $
 77    */
 78    public final class LuceneIndexer implements Indexer, DBObject {
 79   
 80    private static final Log log = LogFactory.getLog(LuceneIndexer.class);
 81   
 82    private static final String NAME = "name";
 83    private static final String PATTERN = "pattern";
 84    private static final String DEFAULT = "default";
 85    private static final String ANALYZER = "analyzer";
 86    private static final String PATTERN_STRING = "pattern";
 87    private static final String PATTERN_ALIAS = "alias";
 88   
 89    public static final String KEYNAME = "key";
 90   
 91    // Default analyzer to use
 92    public static final String DEFANALYZER = "org.apache.lucene.analysis.SimpleAnalyzer";
 93   
 94    private SimpleFSDirectory idxFile;
 95    private IndexWriter iw;
 96    private Analyzer an;
 97   
 98    /**
 99    * Most recently opened searcher. The same Searcher instance is going to
 100    * be used for all the searches unless index has changed and new Searcher
 101    * is required to access the changes.
 102    *
 103    * Searcher cannot be closed if it is being used (if there is a query in
 104    * progress or hits are iterated).
 105    */
 106    private Searcher searcher;
 107   
 108    private Configuration config;
 109    private Collection collection;
 110   
 111    private String name;
 112    private HashMap patterns = new HashMap();
 113   
 114    // Keep a count of changes to the index
 115    private int docsAdded;
 116    private int docsDeleted;
 117   
 118    private final Object lock = new Object();
 119   
 120    private String defaultField = "";
 121   
 122  33 private void setFile(SimpleFSDirectory f) {
 123  33 idxFile = f;
 124    }
 125   
 126  95 private SimpleFSDirectory getFile() {
 127  95 if (null == idxFile) {
 128  0 throw new IllegalStateException("Not bound to a file");
 129    }
 130  95 return idxFile;
 131    }
 132   
 133  125 public String getIndexStyle() {
 134  125 return STYLE_FULLTEXT;
 135    }
 136   
 137    /**
 138    * Returns this Indexer's patterns. LuceneIndexer may have more than one
 139    * pattern.
 140    * @return Indexer's patterns
 141    */
 142  5847 public IndexPattern[] getPatterns() {
 143  5847 return (IndexPattern[]) patterns.keySet().toArray(new IndexPattern[0]);
 144    }
 145   
 146    /**
 147    * Return alias for the given pattern. If this exact pattern is not indexed,
 148    * method will look for matching indexed pattern.
 149    * @param pattern IndexPattern
 150    * @return Alias for the closest matching pattern or null, if there is none
 151    */
 152  9 public String getPatternAlias(IndexPattern pattern) {
 153  9 if (patterns.containsKey(pattern)) {
 154  0 return (String) patterns.get(pattern);
 155    }
 156   
 157  9 int match = 0;
 158  9 IndexPattern matchPattern = null;
 159  9 for (Iterator i = patterns.keySet().iterator(); i.hasNext(); ) {
 160  9 IndexPattern p = (IndexPattern) i.next();
 161  9 int cMatch = pattern.getMatchLevel(p);
 162  9 if (cMatch > match) {
 163  9 match = cMatch;
 164  9 matchPattern = p;
 165    }
 166    }
 167   
 168  9 return (String) patterns.get(matchPattern);
 169    }
 170   
 171    /**
 172    * Configures LuceneIndexer instance.
 173    * <dl>
 174    * <dt>index
 175    * <dd>Top Indexer configuration element. Can have one or more pattern
 176    * child elements. Its attributes:
 177    *
 178    * <ul><li>name - Indexer name. Required.
 179    * <li>class - Indexer class. Required.
 180    * org.apache.xindice.core.indexer.LuceneIndexer for full text index.
 181    * <li>analyzer - Analyzer to use for indexing. Optional,
 182    * org.apache.lucene.analysis.SimpleAnalyzer by default.</ul>
 183    *
 184    * <dl><dt>pattern
 185    * <dd>Child element. Indexer must have at least one pattern. Its
 186    * attributes:
 187    * <ul><li>pattern - IndexPattern. For acceptable formats, see
 188    * {@link org.apache.xindice.core.indexer.Indexer#getPatterns()}
 189    * <li>alias - Name of the field to store/search values for that pattern.
 190    * </dl>
 191    * <dl><dt>default
 192    * <dd>Child element. Optional. Its attributes:
 193    * <li>alias - Indicates the pattern alias that will be used as
 194    * the default field for search. If omitted, search query has to include
 195    * field name for all terms, there will be no default.
 196    * </ul></dl>
 197    * </dl>
 198    *
 199    * @param config Configuration to apply
 200    * @throws XindiceException Configuration does not have required information,
 201    * Analyzer could not have been instantiated.
 202    */
 203  33 public void setConfig(Configuration config) throws XindiceException {
 204  33 this.config = config;
 205  33 try {
 206  33 name = config.getAttribute(NAME);
 207  33 String analyzer = config.getAttribute(ANALYZER);
 208   
 209  33 String anc = StringUtilities.isBlank(analyzer) ? DEFANALYZER : analyzer;
 210  33 Class c = Class.forName(anc);
 211  33 an = (Analyzer) c.newInstance();
 212   
 213  33 Configuration[] patterns = config.getChildren(PATTERN);
 214  33 if (patterns.length == 0) {
 215  0 throw new CannotCreateException("Configuration must have at least one pattern");
 216    }
 217   
 218  33 for (int i = 0; i < patterns.length; i++) {
 219  61 String name = patterns[i].getAttribute(PATTERN_STRING);
 220  61 String alias = patterns[i].getAttribute(PATTERN_ALIAS);
 221  61 this.patterns.put(new IndexPattern(collection.getSymbols(), name, null), alias);
 222    }
 223   
 224  33 Configuration[] defaults = config.getChildren(DEFAULT);
 225  33 if (defaults.length > 1) {
 226  0 throw new CannotCreateException("There may be only one default field");
 227  33 } else if (defaults.length == 1) {
 228  2 String alias = defaults[0].getAttribute(PATTERN_ALIAS);
 229  2 if (this.patterns.values().contains(alias)) {
 230  2 defaultField = alias;
 231    } else {
 232  0 throw new CannotCreateException("Alias '" + alias + "' is undefined in configuration");
 233    }
 234    }
 235   
 236  33 setFile(new SimpleFSDirectory(new File(collection.getCollectionRoot(), name), null));
 237    } catch (Exception e) {
 238  0 throw new XindiceException(e);
 239    }
 240    }
 241   
 242  0 public Configuration getConfig() {
 243  0 return config;
 244    }
 245   
 246  33 public boolean exists() throws DBException {
 247  33 try {
 248  33 return IndexReader.indexExists(idxFile);
 249    } catch (IOException e) {
 250  0 throw new IndexerException(FaultCodes.GEN_GENERAL_ERROR, "Error accessing index", e);
 251    }
 252    }
 253   
 254    /**
 255    * Creates necessary resources.
 256    *
 257    * @return true, if successful
 258    * @throws DBException The was low-level IOException that prevented index
 259    * from creating resources.
 260    * @throws DuplicateIndexException Parent collection already has full text index
 261    */
 262  33 public synchronized boolean create() throws DBException {
 263  33 if (luceneIndexerFound()) {
 264  1 throw new DuplicateIndexException("Collection can only have one full text index.");
 265    }
 266  32 openWrite(true);
 267  32 return true;
 268    }
 269   
 270  33 private boolean luceneIndexerFound() throws DBException {
 271  33 String indexers[] = collection.getIndexManager().list();
 272  33 for (int i = 0; i < indexers.length; i++) {
 273  1 Indexer indexer = collection.getIndexer(indexers[i]);
 274  1 if (indexer instanceof LuceneIndexer) {
 275  1 return true;
 276    }
 277    }
 278   
 279  32 return false;
 280    }
 281   
 282  32 public boolean open() throws DBException {
 283  32 openWrite(false);
 284  32 return true;
 285    }
 286   
 287  584 public boolean isOpened() {
 288  584 return null != iw;
 289    }
 290   
 291  32 public synchronized boolean close() throws DBException {
 292  32 closeWrite();
 293  32 if (searcher != null) {
 294  30 searcher.close(true);
 295    }
 296  32 return true;
 297    }
 298   
 299  32 public boolean drop() throws DBException {
 300  32 try {
 301  32 if (IndexReader.indexExists(idxFile)) {
 302  32 close();
 303  32 return deepDelete(getFile().getFile());
 304    } else {
 305  0 return false;
 306    }
 307    } catch (IOException e) {
 308  0 throw new DBException(FaultCodes.IDX_CORRUPTED,
 309    "Failed to delete index " + name + ", collection " + collection.getCanonicalName(), e);
 310    }
 311    }
 312   
 313  130 public String getName() {
 314  130 return name;
 315    }
 316   
 317  33 public void setCollection(Collection collection) {
 318  33 this.collection = collection;
 319    }
 320   
 321  72 public Analyzer getAnalyzer() {
 322  72 return an;
 323    }
 324   
 325  64 private void openWrite(boolean create) throws DBException {
 326  64 if (log.isTraceEnabled()) {
 327  0 log.trace("Calling openWrite(" + create + ")");
 328    }
 329   
 330  64 try {
 331  64 if (iw == null) {
 332  32 iw = new IndexWriter(getFile(), getAnalyzer(), create, IndexWriter.MaxFieldLength.UNLIMITED);
 333    }
 334    } catch (IOException e) {
 335  0 if (create) {
 336  0 throw new DBException(FaultCodes.IDX_CANNOT_CREATE,
 337    "Failed to cleate index " + name + ", collection " + collection.getCanonicalName(), e);
 338    } else {
 339  0 throw new DBException(FaultCodes.IDX_CORRUPTED,
 340    "Failed to open index " + name + ", collection " + collection.getCanonicalName(), e);
 341    }
 342    }
 343    }
 344   
 345  584 private void assertOpen() {
 346  584 if (!isOpened()) {
 347  0 throw new IllegalStateException("Index has not been opened");
 348    }
 349    }
 350   
 351  32 private void closeWrite() throws DBException {
 352  32 if (null != iw) {
 353  32 try {
 354  32 iw.close();
 355  32 iw = null;
 356    } catch (IOException e) {
 357  0 throw new DBException(FaultCodes.IDX_CORRUPTED,
 358    "Failed to close writer for index " + name + ", collection " + collection.getCanonicalName(), e);
 359    }
 360    }
 361    }
 362   
 363  150 private boolean deepDelete(File f) throws IOException {
 364  150 if (f.isDirectory()) {
 365  32 File fl[] = f.listFiles();
 366  32 for (int i = 0; i < fl.length; i++) {
 367  118 if (!deepDelete(fl[i])) {
 368  0 return false;
 369    }
 370    }
 371    }
 372  150 return f.delete();
 373    }
 374   
 375  285 public void flush() throws DBException {
 376  285 try {
 377  285 assertOpen();
 378  285 if (iw != null) {
 379  285 iw.commit();
 380   
 381  285 int nDocs = iw.maxDoc();
 382    /* Fairly arbitrary rules for triggering index optimisation. Need to
 383    * play with these.
 384    */
 385  285 synchronized(lock) {
 386  285 if (docsAdded > nDocs / 10 || docsAdded > 50 || docsDeleted > 10) {
 387  137 if (log.isDebugEnabled()) {
 388  0 log.debug("Optimizing text index for " + collection.getCanonicalName() + "...");
 389    }
 390   
 391  137 iw.optimize();
 392  137 docsAdded = 0;
 393  137 docsDeleted = 0;
 394    }
 395    }
 396   
 397    }
 398    } catch (IOException e) {
 399  0 throw new DBException(FaultCodes.IDX_CORRUPTED,
 400    "Could not force unwritten data to disk for index " + name + ", collection " + collection.getCanonicalName(), e);
 401    }
 402    }
 403   
 404    /**
 405    * Creates new instance of a handler to listen to indexer events. For
 406    * every document that being added there will be a separate handler
 407    * that will assemble all relevant values in a single Lucene document.
 408    *
 409    * @return new instance of IndexerEventHandler
 410    */
 411  1049 public IndexerEventHandler getIndexerEventHandler() {
 412  1049 return new BasicIndexerEventHandler() {
 413    Document doc;
 414   
 415  922 public void onDocumentAdded(Key key) throws DBException {
 416  922 if (doc != null) {
 417  172 assertOpen();
 418   
 419  172 try {
 420  172 iw.addDocument(doc);
 421  172 synchronized(lock) {
 422  172 docsAdded++;
 423    }
 424    } catch (IOException e) {
 425  0 throw new DBException(FaultCodes.IDX_CORRUPTED,
 426    "Failed to add document to the index " + name + ", collection " + collection.getCanonicalName(), e);
 427    }
 428    }
 429    }
 430   
 431  127 public void onDocumentDeleted(Key key) throws DBException {
 432  127 assertOpen();
 433   
 434  127 try {
 435  127 iw.deleteDocuments(new Term(KEYNAME, key.toString()));
 436  127 synchronized(lock) {
 437  127 docsDeleted++;
 438    }
 439    } catch (IOException e) {
 440  0 throw new DBException(FaultCodes.IDX_CORRUPTED,
 441    "Failed to delete document from the index " + name + ", collection " + collection.getCanonicalName(), e);
 442    }
 443    }
 444   
 445  370 public void onValueAdded(IndexPattern pattern, String value, Key key, int pos, int len, short elemID, short attrID) {
 446  370 if (doc == null) {
 447  172 doc = new Document();
 448  172 doc.add(new Field(KEYNAME, key.toString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
 449    }
 450   
 451  370 String field = (String) patterns.get(pattern);
 452  370 doc.add(new Field(field, value, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES));
 453    }
 454    };
 455    }
 456   
 457  6 public IndexMatch[] queryMatches(final IndexQuery query) throws DBException {
 458    // this indexer only supports text queries
 459  6 if (query.getOperator() != IndexQuery.TQ) {
 460  0 return null;
 461    }
 462   
 463  6 String textQuery = query.getValue(0).toString();
 464  6 try {
 465  6 return queryMatches(new QueryParser(defaultField, getAnalyzer()).parse(textQuery));
 466    } catch (ParseException e) {
 467  0 throw new CompilationException("Failed to parse query '" + textQuery + "'", e);
 468    }
 469    }
 470   
 471    /**
 472    * Same as {@link Indexer#queryMatches(IndexQuery)}, but accepts compiled Lucene query as
 473    * parameter.
 474    *
 475    * @param query Compiled Lucene query.
 476    * @return The resulting matches
 477    * @throws DBException if IOException prevented indexer from executing the query.
 478    */
 479  40 public IndexMatch[] queryMatches(Query query) throws DBException {
 480  40 IndexMatch[] matches = null;
 481  40 Searcher searcher = getSearcher();
 482   
 483  40 try {
 484  40 TopDocs docs = searcher.is.search(query, searcher.ir.numDocs());
 485  40 matches = new IndexMatch[docs.scoreDocs.length];
 486   
 487  40 for (int i = 0; i < docs.scoreDocs.length; i++) {
 488  85 int doc = docs.scoreDocs[i].doc;
 489  85 Key key = new Key(searcher.ir.document(doc).getField(KEYNAME).stringValue());
 490  85 matches[i] = new IndexMatch(key, -1, -1);
 491    }
 492    } catch (IOException e) {
 493  0 throw new ProcessingException("Failed to process a query", e);
 494    } finally {
 495  40 searcher.free();
 496    }
 497   
 498  40 return matches;
 499    }
 500   
 501    /**
 502    * getSearcher returns Searcher that uses current version of the index.
 503    * If index has been modified since last time searcher was requested
 504    * this method will create new Searcher instance, otherwise it will
 505    * return Searcher instance it created previously.
 506    *
 507    * @return current Searcher
 508    * @throws DBException If index could not be accessed
 509    */
 510  40 private synchronized Searcher getSearcher() throws DBException {
 511   
 512  40 if (searcher != null && !searcher.isCurrent()) {
 513  1 searcher.close(false);
 514  1 searcher = null;
 515    }
 516   
 517  40 if (searcher == null) {
 518  31 searcher = new Searcher();
 519    } else {
 520  9 searcher.incRef();
 521    }
 522   
 523  40 return searcher;
 524    }
 525   
 526    private class Searcher {
 527    private IndexReader ir;
 528    private IndexSearcher is;
 529   
 530    // number of searches in progress using that searcher
 531    private int ref = 1;
 532   
 533  31 public Searcher() throws DBException {
 534  31 try {
 535  31 ir = IndexReader.open(getFile(), true);
 536  31 is = new IndexSearcher(ir);
 537    } catch (IOException e) {
 538  0 throw new DBException(FaultCodes.IDX_CORRUPTED,
 539    "Failed to open access " + name + ", collection " + collection.getCanonicalName(), e);
 540    }
 541    }
 542   
 543  10 public boolean isCurrent() throws DBException {
 544  10 try {
 545  10 return ir.isCurrent();
 546    } catch (IOException e) {
 547  0 throw new DBException(FaultCodes.IDX_CORRUPTED,
 548    "Failed to access index " + name + ", collection " + collection.getCanonicalName(), e);
 549    }
 550    }
 551   
 552  9 public void incRef() {
 553  9 ref++;
 554    }
 555   
 556    /**
 557    * This method must be called after executing text query to cleanup
 558    * resources that are not in use anymore. It decrements number of
 559    * searches referencing this searcher and then attempts to close it
 560    * unless it is the most recently opened searcher. If there were no
 561    * searchers opened after this one, the searcher will be kept open
 562    * for future use, even if it is not used at the moment.
 563    *
 564    * @throws DBException if there was IOException
 565    */
 566  40 public void free() throws DBException {
 567  40 synchronized (LuceneIndexer.this) {
 568  40 ref--;
 569   
 570  40 if (searcher != this) {
 571  0 close(false);
 572    }
 573    }
 574    }
 575   
 576    /**
 577    * Closes the searcher if it is not used in any search.
 578    *
 579    * @param force true if searcher has to be closed even if it is used
 580    * @throws DBException if there was IOException
 581    */
 582  31 public void close(boolean force) throws DBException {
 583  31 try {
 584  31 if (ref == 0 || force) {
 585  31 is.close();
 586  31 ir.close();
 587    }
 588    } catch (IOException e) {
 589  0 throw new DBException(FaultCodes.IDX_CORRUPTED,
 590    "Failed to access index " + name + ", collection " + collection.getCanonicalName(), e);
 591    }
 592    }
 593   
 594    /**
 595    * Internal search method.
 596    * @param query Search query
 597    * @return TopDocs for the search
 598    * @throws DBException Index could not be accessed
 599    * @deprecated Deprecated following Lucene changes
 600    */
 601  0 public TopDocs search(Query query) throws DBException {
 602  0 try {
 603  0 return is.search(query, ir.numDocs());
 604    } catch (IOException e) {
 605  0 throw new DBException(FaultCodes.IDX_CORRUPTED,
 606    "Failed to access index " + name + ", collection " + collection.getCanonicalName(), e);
 607    }
 608    }
 609    }
 610    }