Clover coverage report -
Coverage timestamp: Sun Nov 1 2009 23:08:24 UTC
file stats: LOC: 298   Methods: 27
NCLOC: 183   Classes: 1
 
 Source file Conditionals Statements Methods TOTAL
NodeReader.java 63.6% 53% 40.7% 52.3%
coverage coverage
 1    /*
 2    * Licensed to the Apache Software Foundation (ASF) under one or more
 3    * contributor license agreements. See the NOTICE file distributed with
 4    * this work for additional information regarding copyright ownership.
 5    * The ASF licenses this file to You under the Apache License, Version 2.0
 6    * (the "License"); you may not use this file except in compliance with
 7    * the License. You may obtain a copy of the License at
 8    *
 9    * http://www.apache.org/licenses/LICENSE-2.0
 10    *
 11    * Unless required by applicable law or agreed to in writing, software
 12    * distributed under the License is distributed on an "AS IS" BASIS,
 13    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14    * See the License for the specific language governing permissions and
 15    * limitations under the License.
 16    *
 17    * $Id: NodeReader.java 821662 2009-10-05 02:09:28Z natalia $
 18    */
 19   
 20    package org.apache.xindice.core.query.ftsearch;
 21   
 22    import org.apache.lucene.index.IndexReader;
 23    import org.apache.lucene.index.TermFreqVector;
 24    import org.apache.lucene.index.TermEnum;
 25    import org.apache.lucene.index.Term;
 26    import org.apache.lucene.index.TermDocs;
 27    import org.apache.lucene.index.TermPositions;
 28    import org.apache.lucene.index.TermVectorMapper;
 29    import org.apache.lucene.document.Document;
 30    import org.apache.lucene.document.FieldSelector;
 31    import org.apache.lucene.analysis.Analyzer;
 32    import org.apache.lucene.analysis.TokenStream;
 33    import org.apache.lucene.analysis.tokenattributes.TermAttribute;
 34    import org.apache.lucene.search.DefaultSimilarity;
 35    import org.apache.xindice.xml.dom.NodeImpl;
 36    import org.w3c.dom.NodeList;
 37    import org.w3c.dom.Node;
 38   
 39    import java.io.IOException;
 40    import java.io.StringReader;
 41    import java.util.Collection;
 42    import java.util.Arrays;
 43    import java.util.ArrayList;
 44    import java.util.SortedMap;
 45    import java.util.TreeMap;
 46    import java.util.HashSet;
 47    import java.util.List;
 48   
 49    /**
 50    * Implementation of IndexReader that works with set of DOM nodes in memory.
 51    * Set of nodes is constant - no modifications of it are allowed.
 52    *
 53    * @version $Revision: 821662 $, $Date: 2009-10-05 02:09:28 +0000 (Mon, 05 Oct 2009) $
 54    */
 55    public class NodeReader extends IndexReader {
 56    // list of DOM nodes
 57    private final ArrayList[] nodes;
 58   
 59    private byte[] norms;
 60   
 61    // maps term to a list of documents where it appears
 62    private final SortedMap termMap;
 63   
 64  54 protected NodeReader(NodeList list, Analyzer analyzer) {
 65  54 super();
 66   
 67  54 nodes = new ArrayList[list.getLength()];
 68   
 69  54 for (int i = 0; i < nodes.length; i++) {
 70  74 nodes[i] = new ArrayList();
 71  74 Node node = list.item(i);
 72   
 73  74 String text;
 74  74 if (node instanceof NodeImpl) {
 75    // DOM Level 3 method
 76  74 text = ((NodeImpl) node).getTextContent();
 77    } else {
 78  0 text = getTextContent(node);
 79    }
 80   
 81  74 TokenStream stream = analyzer.tokenStream("", new StringReader(text));
 82  74 TermAttribute termAttr = (TermAttribute) stream.addAttribute(TermAttribute.class);
 83  74 try {
 84  74 stream.reset();
 85  74 while (stream.incrementToken()) {
 86  1256 nodes[i].add(termAttr.term());
 87    }
 88  74 stream.end();
 89  74 stream.close();
 90    } catch (IOException e) {
 91    // won't happen
 92    }
 93    }
 94   
 95    // init norms
 96  54 norms = new byte[nodes.length];
 97  54 Arrays.fill(norms, DefaultSimilarity.encodeNorm(1.0f));
 98   
 99    // build term enumeration
 100  54 termMap = buildTermMap();
 101    }
 102   
 103  0 public TermFreqVector[] getTermFreqVectors(int docNumber) {
 104  0 throw new UnsupportedOperationException();
 105    }
 106   
 107  0 public TermFreqVector getTermFreqVector(int docNumber, String field) {
 108  0 throw new UnsupportedOperationException();
 109    }
 110   
 111  0 public void getTermFreqVector(int i, String string, TermVectorMapper termVectorMapper) throws IOException {
 112  0 throw new UnsupportedOperationException();
 113    }
 114   
 115  0 public void getTermFreqVector(int i, TermVectorMapper termVectorMapper) throws IOException {
 116  0 throw new UnsupportedOperationException();
 117    }
 118   
 119  54 public int numDocs() {
 120  54 return nodes.length;
 121    }
 122   
 123  116 public int maxDoc() {
 124  116 return nodes.length;
 125    }
 126   
 127    /**
 128    * Method is not supported.
 129    */
 130  0 public Document document(int n, FieldSelector fieldSelector) {
 131  0 return null;
 132    }
 133   
 134    /**
 135    * Deletion is not supported.
 136    */
 137  0 public boolean isDeleted(int n) {
 138  0 return false;
 139    }
 140   
 141    /**
 142    * Deletion is not supported.
 143    */
 144  0 public boolean hasDeletions() {
 145  0 return false;
 146    }
 147   
 148  53 public byte[] norms(String field) throws IOException {
 149  53 return field.length() == 0 ? norms : null;
 150    }
 151   
 152  0 public void norms(String field, byte[] bytes, int offset) {
 153  0 System.arraycopy(norms, 0, bytes, offset, maxDoc());
 154    }
 155   
 156  0 protected void doSetNorm(int doc, String field, byte value) {
 157  0 if (field.length() > 0) {
 158  0 return;
 159    }
 160   
 161  0 norms[doc] = value;
 162    }
 163   
 164  0 public TermEnum terms() {
 165  0 return new NodeTermEnum(termMap);
 166    }
 167   
 168  11 public TermEnum terms(Term t) {
 169  11 return new NodeTermEnum(termMap, t);
 170    }
 171   
 172    /**
 173    * Builds the map of all the terms in all the nodes to the list of
 174    * node numbers where those terms appear.
 175    * @return Map with keys sorted in ascending order
 176    */
 177  54 private SortedMap buildTermMap() {
 178  54 SortedMap map = new TreeMap();
 179   
 180  54 for (int i = 0; i < nodes.length; i++) {
 181  74 for (int j = 0; j < nodes[i].size(); j++) {
 182  1256 String term = (String) nodes[i].get(j);
 183   
 184  1256 List docs;
 185  1256 if (map.containsKey(term)) {
 186  522 docs = (List) map.get(term);
 187    } else {
 188  734 docs = new ArrayList();
 189    }
 190   
 191  1256 docs.add(new Integer(i));
 192  1256 map.put(term, docs);
 193    }
 194    }
 195   
 196  54 return map;
 197    }
 198   
 199  59 public int docFreq(Term t) {
 200  59 List docs = (List) termMap.get(t.text());
 201   
 202    // no such term
 203  59 if (docs == null) {
 204  22 return 0;
 205    }
 206   
 207  37 HashSet set = new HashSet();
 208  37 set.addAll(docs);
 209  37 return set.size();
 210    }
 211   
 212  55 public TermDocs termDocs() {
 213  55 return new NodeTermDocs(this);
 214    }
 215   
 216  4 public TermPositions termPositions() {
 217  4 return new NodeTermPositions(this);
 218    }
 219   
 220    /**
 221    * Deletion is not supported.
 222    */
 223  0 protected void doDelete(int docNum) {
 224  0 throw new UnsupportedOperationException();
 225    }
 226   
 227    /**
 228    * Deletion is not supported.
 229    */
 230  0 protected void doUndeleteAll() {
 231  0 throw new UnsupportedOperationException();
 232    }
 233   
 234    /**
 235    * Not applicable
 236    */
 237  0 protected void doCommit() {
 238    }
 239   
 240    /**
 241    * Not applicable
 242    */
 243  0 protected void doClose() {
 244    }
 245   
 246    /**
 247    * Field names are not supported.
 248    */
 249  0 public Collection getFieldNames(FieldOption fldOption) {
 250  0 throw new UnsupportedOperationException();
 251    }
 252   
 253  59 ArrayList[] getNodes() {
 254  59 return nodes;
 255    }
 256   
 257  59 SortedMap getTermMap() {
 258  59 return termMap;
 259    }
 260   
 261    /**
 262    * Get text content of a DOM node. This is the same as DOM Level 3 method
 263    * getTextContent().
 264    * @param node DOM node
 265    * @return The text content of this node and its descendants.
 266    */
 267  0 private String getTextContent(Node node) {
 268  0 String text = null;
 269  0 switch (node.getNodeType()) {
 270  0 case Node.ATTRIBUTE_NODE:
 271  0 case Node.CDATA_SECTION_NODE:
 272  0 case Node.COMMENT_NODE:
 273  0 case Node.PROCESSING_INSTRUCTION_NODE:
 274  0 case Node.TEXT_NODE:
 275  0 text = node.getNodeValue();
 276  0 break;
 277  0 case Node.ELEMENT_NODE:
 278  0 case Node.DOCUMENT_FRAGMENT_NODE:
 279  0 case Node.ENTITY_NODE:
 280  0 case Node.ENTITY_REFERENCE_NODE:
 281  0 StringBuffer val = new StringBuffer();
 282   
 283  0 NodeList children = node.getChildNodes();
 284  0 if (children == null || children.getLength() == 0) {
 285  0 text = "";
 286  0 break;
 287    }
 288   
 289  0 for (int i = 0; i < children.getLength(); i++) {
 290  0 val.append(getTextContent(children.item(i)));
 291    }
 292  0 text = val.toString();
 293  0 break;
 294    }
 295   
 296  0 return text;
 297    }
 298    }