Clover coverage report -
Coverage timestamp: Sun Nov 1 2009 23:08:24 UTC
file stats: LOC: 445   Methods: 17
NCLOC: 319   Classes: 1
 
 Source file Conditionals Statements Methods TOTAL
ValueIndexer.java 61.3% 83.2% 100% 79.2%
coverage coverage
 1    /*
 2    * Licensed to the Apache Software Foundation (ASF) under one or more
 3    * contributor license agreements. See the NOTICE file distributed with
 4    * this work for additional information regarding copyright ownership.
 5    * The ASF licenses this file to You under the Apache License, Version 2.0
 6    * (the "License"); you may not use this file except in compliance with
 7    * the License. You may obtain a copy of the License at
 8    *
 9    * http://www.apache.org/licenses/LICENSE-2.0
 10    *
 11    * Unless required by applicable law or agreed to in writing, software
 12    * distributed under the License is distributed on an "AS IS" BASIS,
 13    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14    * See the License for the specific language governing permissions and
 15    * limitations under the License.
 16    *
 17    * $Id: ValueIndexer.java 712571 2008-11-09 22:00:06Z natalia $
 18    */
 19   
 20    package org.apache.xindice.core.indexer;
 21   
 22    import org.apache.commons.logging.Log;
 23    import org.apache.commons.logging.LogFactory;
 24    import org.apache.xindice.core.Collection;
 25    import org.apache.xindice.core.DBException;
 26    import org.apache.xindice.core.data.Key;
 27    import org.apache.xindice.core.data.Value;
 28    import org.apache.xindice.core.filer.BTree;
 29    import org.apache.xindice.core.filer.BTreeCallback;
 30    import org.apache.xindice.core.filer.BTreeCorruptException;
 31    import org.apache.xindice.core.filer.Paged;
 32    import org.apache.xindice.core.query.QueryEngine;
 33    import org.apache.xindice.util.Configuration;
 34    import org.apache.xindice.xml.SymbolTable;
 35   
 36    import java.io.File;
 37    import java.io.IOException;
 38    import java.util.ArrayList;
 39    import java.util.List;
 40   
 41    /**
 42    * ValueIndexer is a basic implementation of the Indexer interface.
 43    * It is used for maintaining element and element@attribute value
 44    * indexes.
 45    *
 46    * @version $Revision: 712571 $, $Date: 2008-11-09 22:00:06 +0000 (Sun, 09 Nov 2008) $
 47    */
 48    public final class ValueIndexer extends BTree implements Indexer {
 49   
 50    private static final Log log = LogFactory.getLog(ValueIndexer.class);
 51   
 52    private static final IndexMatch[] EmptyMatches = new IndexMatch[0];
 53    private static final Value EmptyValue = new Value(new byte[0]);
 54   
 55    private static final long MATCH_INFO = -1000;
 56   
 57    private static final String NAME = "name";
 58    private static final String PATTERN = "pattern";
 59    private static final String TYPE = "type";
 60   
 61    private static final int STRING = 0;
 62    private static final int TRIMMED = 1;
 63    private static final int INTEGER = 2;
 64    private static final int FLOAT = 3;
 65    private static final int BYTE = 4;
 66    private static final int CHAR = 5;
 67    private static final int BOOLEAN = 6;
 68   
 69    private static final int[] sizes = {-1, -1, 8, 8, 1, 2, 1};
 70   
 71    private static final String STRING_VAL = "string";
 72    private static final String TRIMMED_VAL = "trimmed";
 73    private static final String SHORT_VAL = "short";
 74    private static final String INT_VAL = "int";
 75    private static final String LONG_VAL = "long";
 76    private static final String FLOAT_VAL = "float";
 77    private static final String DOUBLE_VAL = "double";
 78    private static final String BYTE_VAL = "byte";
 79    private static final String CHAR_VAL = "char";
 80    private static final String BOOLEAN_VAL = "boolean";
 81   
 82    private Collection collection;
 83    private SymbolTable symbols;
 84   
 85    private String name;
 86    private IndexPattern pattern;
 87    private int type;
 88    private int typeSize = 32;
 89    private boolean wildcard;
 90    private IndexerEventHandler handler;
 91   
 92   
 93  64 public ValueIndexer() {
 94  64 super();
 95    }
 96   
 97    /**
 98    * Override createFileHeader - set page size to 1024
 99    */
 100  64 protected Paged.FileHeader createFileHeader() {
 101  64 Paged.FileHeader header = super.createFileHeader();
 102  64 header.setPageSize(1024);
 103  64 return header;
 104    }
 105   
 106  64 public void setCollection(Collection collection) {
 107  64 this.collection = collection;
 108  64 symbols = collection.getSymbols();
 109    }
 110   
 111  64 private void setLocation(String location) {
 112  64 setFile(new File(collection.getCollectionRoot(), location + ".idx"));
 113    }
 114   
 115  64 public void setConfig(Configuration config) {
 116  64 super.setConfig(config);
 117  64 try {
 118  64 name = config.getAttribute(NAME);
 119   
 120  64 String pattern = config.getAttribute(PATTERN);
 121  64 wildcard = pattern.indexOf('*') != -1;
 122   
 123    // Determine the Index Type
 124  64 String tv = config.getAttribute(TYPE, STRING_VAL).toLowerCase();
 125  64 if (tv.equals(STRING_VAL)) {
 126  47 type = STRING;
 127  17 } else if (tv.equals(TRIMMED_VAL)) {
 128  0 type = TRIMMED;
 129  17 } else if (tv.equals(SHORT_VAL)) {
 130  0 type = INTEGER;
 131  17 } else if (tv.equals(INT_VAL)) {
 132  0 type = INTEGER;
 133  17 } else if (tv.equals(LONG_VAL)) {
 134  13 type = INTEGER;
 135  4 } else if (tv.equals(FLOAT_VAL)) {
 136  1 type = FLOAT;
 137  3 } else if (tv.equals(DOUBLE_VAL)) {
 138  0 type = FLOAT;
 139  3 } else if (tv.equals(BYTE_VAL)) {
 140  1 type = BYTE;
 141  2 } else if (tv.equals(CHAR_VAL)) {
 142  1 type = CHAR;
 143  1 } else if (tv.equals(BOOLEAN_VAL)) {
 144  1 type = BOOLEAN;
 145    } else {
 146  0 if (pattern.indexOf('@') != -1) {
 147  0 type = STRING;
 148    } else {
 149  0 type = TRIMMED;
 150    }
 151    }
 152   
 153  64 this.pattern = new IndexPattern(symbols, pattern, null);
 154  64 typeSize = sizes[type];
 155   
 156  64 setLocation(name);
 157  64 setupHandler();
 158    } catch (Exception e) {
 159  0 if (log.isWarnEnabled()) {
 160  0 log.warn("ignored exception", e);
 161    }
 162    }
 163    }
 164   
 165  64 private void setupHandler() {
 166  64 handler = new BasicIndexerEventHandler() {
 167  917 public void onValueAdded(IndexPattern pattern, String value, Key key, int pos, int len, short elemID, short attrID) throws DBException {
 168  917 Value v = getTypedValue(value);
 169  917 if (type != STRING && type != TRIMMED && v.getLength() == 0) {
 170  0 return;
 171    }
 172   
 173  917 try {
 174  917 BTreeRootInfo root = findBTreeRoot(v);
 175  917 if (root == null) {
 176  885 root = createBTreeRoot(v);
 177    }
 178   
 179  917 Value cv = getCombinedValue(key, pos, len, elemID, attrID);
 180  917 addValue(root, cv, MATCH_INFO);
 181    } catch (DBException e) {
 182  0 throw e;
 183    } catch (IOException e) {
 184  0 throw new BTreeCorruptException("Corruption detected on add", e);
 185    } catch (Exception e) {
 186  0 if (log.isWarnEnabled()) {
 187  0 log.warn("ignored exception", e);
 188    }
 189    }
 190    }
 191   
 192  1 public void onValueDeleted(IndexPattern pattern, String value, Key key, int pos, int len, short elemID, short attrID) throws DBException {
 193  1 Value v = getTypedValue(value);
 194  1 if (type != STRING && type != TRIMMED && v.getLength() == 0) {
 195  0 return;
 196    }
 197   
 198  1 try {
 199  1 BTreeRootInfo root = findBTreeRoot(v);
 200  1 Value cv = getCombinedValue(key, pos, len, elemID, attrID);
 201  1 removeValue(root, cv);
 202    } catch (DBException e) {
 203  0 throw e;
 204    } catch (Exception e) {
 205  0 if (log.isWarnEnabled()) {
 206  0 log.warn("ignored exception", e);
 207    }
 208    }
 209    }
 210    };
 211    }
 212   
 213  261 public String getName() {
 214  261 return name;
 215    }
 216   
 217  225 public String getIndexStyle() {
 218  225 return STYLE_NODEVALUE;
 219    }
 220   
 221  27783 public IndexPattern[] getPatterns() {
 222  27783 return new IndexPattern[] { pattern };
 223    }
 224   
 225    /**
 226    * Creates new Value object that depends on string parameter and a type of indexer.
 227    * The idea here is that any value of any type should be transformed to a byte array
 228    * in such a way that two Values of the same type are comparable. Value objects are
 229    * compared by comparing their data arrays byte-by-byte, starting from byte with
 230    * index 0. Also, data arrays will behave as array of <b>unsigned</b> bytes with
 231    * values ranging from 0 to 255.
 232    *
 233    * @param value string value to convert
 234    * @return new Value object that represents specific value of this indexer type
 235    */
 236  950 public Value getTypedValue(String value) {
 237  950 if (type != STRING && type != TRIMMED) {
 238  109 value = value.trim();
 239  109 if (value.length() == 0) {
 240  0 return EmptyValue;
 241    }
 242   
 243  109 byte[] b = new byte[typeSize];
 244  109 try {
 245  109 switch (type) {
 246  90 case INTEGER:
 247    /*
 248    Generally, two integers can be compared byte-by-byte, returning correct results,
 249    except negative numbers which will be always bigger than positive numbers.
 250    To solve this, change the value to be unsigned. Number range changes from
 251    Long.MIN_VALUE / Long.MAX_VALUE to 0 / Long.MAX_VALUE - Long.MIN_VALUE.
 252    This is done by flipping the first bit of the byte with index 0.
 253    */
 254  90 long l = Long.parseLong(value);
 255  90 b[0] = (byte) ((l >>> 56) & 0xFF);
 256  90 b[1] = (byte) ((l >>> 48) & 0xFF);
 257  90 b[2] = (byte) ((l >>> 40) & 0xFF);
 258  90 b[3] = (byte) ((l >>> 32) & 0xFF);
 259  90 b[4] = (byte) ((l >>> 24) & 0xFF);
 260  90 b[5] = (byte) ((l >>> 16) & 0xFF);
 261  90 b[6] = (byte) ((l >>> 8) & 0xFF);
 262  90 b[7] = (byte) (l & 0xFF);
 263  90 b[0] = (byte) (b[0] ^ 0x80);
 264  90 break;
 265   
 266  10 case FLOAT:
 267    /*
 268    Float/Double number are stored according to IEEE standard 754. In short,
 269    float numbers have the folloing format: 1 bit to indicate the sign of
 270    the number, 8 bits for exponent, 23 bits for mantissa, and double
 271    numbers have the following format: 1 bit to indicate the sign of
 272    the number, 11 bits for exponent, 52 bits for mantissa. Both float and
 273    double are normalized, so they can be compared byte-by-byte, except
 274    that comparing two negative numbers or two number with different signs
 275    will return incorrect results.
 276    This is solved by changing number sign (is is stored in the first bit)
 277    and flipping all the bits for negative numbers.
 278    */
 279  10 double d = Double.parseDouble(value);
 280  10 long bits = Double.doubleToLongBits(d);
 281   
 282  10 b[0] = (byte) ((bits >>> 56) & 0xFF);
 283  10 if ((b[0] & 0xFF) != 0) {
 284    // negative numbers
 285  9 b[0] ^= 0xFF;
 286  9 b[1] = (byte) ((bits >>> 48) & 0xFF ^ 0xFF);
 287  9 b[2] = (byte) ((bits >>> 40) & 0xFF ^ 0xFF);
 288  9 b[3] = (byte) ((bits >>> 32) & 0xFF ^ 0xFF);
 289  9 b[4] = (byte) ((bits >>> 24) & 0xFF ^ 0xFF);
 290  9 b[5] = (byte) ((bits >>> 16) & 0xFF ^ 0xFF);
 291  9 b[6] = (byte) ((bits >>> 8) & 0xFF ^ 0xFF);
 292  9 b[7] = (byte) (bits & 0xFF ^ 0xFF);
 293    } else {
 294  1 b[0] ^= 0x80;
 295  1 b[1] = (byte) ((bits >>> 48) & 0xFF);
 296  1 b[2] = (byte) ((bits >>> 40) & 0xFF);
 297  1 b[3] = (byte) ((bits >>> 32) & 0xFF);
 298  1 b[4] = (byte) ((bits >>> 24) & 0xFF);
 299  1 b[5] = (byte) ((bits >>> 16) & 0xFF);
 300  1 b[6] = (byte) ((bits >>> 8) & 0xFF);
 301  1 b[7] = (byte) (bits & 0xFF);
 302    }
 303   
 304  10 break;
 305   
 306  3 case BYTE:
 307  3 b[0] = Byte.parseByte(value);
 308  3 b[0] = (byte) (b[0] ^ 0x80);
 309  3 break;
 310   
 311  3 case CHAR:
 312  3 char c = value.charAt(0);
 313  3 b[0] = (byte) ((c >>> 8) & 0xFF);
 314  3 b[1] = (byte) ( c & 0xFF);
 315  3 break;
 316   
 317  3 case BOOLEAN:
 318  3 if ("[true][yes][1][y][on]".indexOf("[" + value.toLowerCase() + "]") != -1) {
 319  1 b[0] = 1;
 320  2 } else if ("[false][no][0][n][off]".indexOf("[" + value.toLowerCase() + "]") != -1) {
 321  2 b[0] = 0;
 322    } else {
 323  0 return EmptyValue;
 324    }
 325  3 break;
 326   
 327  0 default:
 328  0 if (log.isWarnEnabled()) {
 329  0 log.warn("invalid type : " + type);
 330    }
 331    }
 332   
 333  109 return new Value(b);
 334   
 335    } catch (Exception e) {
 336  0 return EmptyValue;
 337    }
 338    }
 339   
 340  841 if (type == TRIMMED) {
 341  0 value = QueryEngine.normalizeString(value);
 342    }
 343   
 344  841 return new Value(value);
 345    }
 346   
 347  918 private Value getCombinedValue(Key key, int pos, int len, short elemID, short attrID) {
 348  918 int l = key.getLength();
 349  918 byte[] b = new byte[l + 13];
 350   
 351    // Write the key
 352  918 key.copyTo(b, 0, l);
 353  918 b[l] = 0;
 354   
 355    // Write the pos
 356  918 b[l + 1] = (byte) ((pos >>> 24) & 0xFF);
 357  918 b[l + 2] = (byte) ((pos >>> 16) & 0xFF);
 358  918 b[l + 3] = (byte) ((pos >>> 8) & 0xFF);
 359  918 b[l + 4] = (byte) ( pos & 0xFF);
 360   
 361    // Write the len
 362  918 b[l + 5] = (byte) ((len >>> 24) & 0xFF);
 363  918 b[l + 6] = (byte) ((len >>> 16) & 0xFF);
 364  918 b[l + 7] = (byte) ((len >>> 8) & 0xFF);
 365  918 b[l + 8] = (byte) ( len & 0xFF);
 366   
 367    // Write the elemID
 368  918 b[l + 9] = (byte) ((elemID >>> 8) & 0xFF);
 369  918 b[l + 10] = (byte) ( elemID & 0xFF);
 370   
 371    // Write the attrID
 372  918 b[l + 11] = (byte) ((attrID >>> 8) & 0xFF);
 373  918 b[l + 12] = (byte) ( attrID & 0xFF);
 374   
 375  918 return new Value(b);
 376    }
 377   
 378  171 private IndexMatch getIndexMatch(Value v) {
 379  171 int l = v.getLength() - 13;
 380   
 381  171 Key key = v.keyAt(0, l);
 382  171 int pos = v.intAt(l + 1);
 383  171 int len = v.intAt(l + 5);
 384  171 short elemID = v.shortAt(l + 9);
 385  171 short attrID = v.shortAt(l + 11);
 386   
 387  171 return new IndexMatch(key, pos, len, elemID, attrID);
 388    }
 389   
 390  93 public IndexMatch[] queryMatches(final IndexQuery query) throws DBException {
 391    // Pre-process the value-set for typing and trimming
 392  93 if (type != STRING) {
 393  25 Value[] vals = query.getValues();
 394  25 for (int i = 0; i < vals.length; i++) {
 395  32 vals[i] = getTypedValue(vals[i].toString());
 396    }
 397    }
 398   
 399    // Now issue the query
 400  93 final List results = new ArrayList();
 401   
 402  93 try {
 403  93 query(query, new BTreeCallback() {
 404  306 public boolean indexInfo(Value value, long pos) {
 405  306 try {
 406  306 if (pos == MATCH_INFO) {
 407  171 IndexMatch match = getIndexMatch(value);
 408  171 if (wildcard) {
 409  38 IndexPattern matchElement = new IndexPattern(symbols, match.getElement(), match.getAttribute());
 410  38 IndexPattern queryElement = new IndexPattern(symbols, query.getPattern().getElementID(),
 411    query.getPattern().getAttributeID());
 412  38 if (matchElement.getMatchLevel(queryElement) > 0) {
 413  19 results.add(match);
 414    }
 415    } else {
 416  133 results.add(match);
 417    }
 418    } else {
 419  135 BTreeRootInfo root = new BTreeRootInfo(value, pos);
 420  135 query(root, null, this);
 421    }
 422  306 return true;
 423    } catch (Exception e) {
 424  0 if (log.isWarnEnabled()) {
 425  0 log.warn("ignored exception", e);
 426    }
 427    }
 428  0 return true;
 429    }
 430    });
 431    } catch (IOException e) {
 432  0 throw new BTreeCorruptException("Corruption detected on query", e);
 433    } catch (Exception e) {
 434  0 if (log.isWarnEnabled()) {
 435  0 log.warn("ignored exception", e);
 436    }
 437    }
 438   
 439  93 return (IndexMatch[]) results.toArray(EmptyMatches);
 440    }
 441   
 442  4663 public IndexerEventHandler getIndexerEventHandler() {
 443  4663 return handler;
 444    }
 445    }