mirror of https://github.com/apache/lucene.git
LUCENE-1673: Move TrieRange to core (part 2: removing from contrib/queries)
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@786474 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
7b34ab8f30
commit
0b5cbca110
|
@ -338,7 +338,7 @@
|
|||
<group title="contrib: Lucli" packages="lucli*"/>
|
||||
<group title="contrib: Memory" packages="org.apache.lucene.index.memory*"/>
|
||||
<group title="contrib: Miscellaneous " packages="org.apache.lucene.misc*:org.apache.lucene.queryParser.analyzing*:org.apache.lucene.queryParser.precedence*"/>
|
||||
<group title="contrib: Queries" packages="org.apache.lucene.search.similar*:org.apache.lucene.search.trie*"/>
|
||||
<group title="contrib: Queries" packages="org.apache.lucene.search.similar*"/>
|
||||
<group title="contrib: RegEx" packages="org.apache.lucene.search.regex*:org.apache.regexp*"/>
|
||||
<group title="contrib: Snowball" packages="org.apache.lucene.analysis.snowball*:net.sf.snowball*"/>
|
||||
<group title="contrib: Spatial" packages="org.apache.lucene.spatial*"/>
|
||||
|
|
|
@ -38,33 +38,28 @@ Bug fixes
|
|||
|
||||
New features
|
||||
|
||||
1. LUCENE-1470: Added TrieRangeQuery, a much faster implementation of
|
||||
RangeQuery at the expense of added space (additional indexed
|
||||
tokens) consumed in the index. (Uwe Schindler via Mike McCandless)
|
||||
1. LUCENE-1531: Added support for BoostingTermQuery to XML query parser. (Karl Wettin)
|
||||
|
||||
2. LUCENE-1531: Added support for BoostingTermQuery to XML query parser. (Karl Wettin)
|
||||
|
||||
3. LUCENE-1435: Added contrib/collation, a CollationKeyFilter
|
||||
2. LUCENE-1435: Added contrib/collation, a CollationKeyFilter
|
||||
allowing you to convert tokens into CollationKeys encoded usign
|
||||
IndexableBinaryStringTools. This allows for faster RangQuery when
|
||||
a field needs to use a custom Collator. (Steven Rowe via Mike
|
||||
McCandless)
|
||||
|
||||
4. LUCENE-1591: EnWikiDocMaker, LineDocMaker, WriteLineDoc can now
|
||||
3. LUCENE-1591: EnWikiDocMaker, LineDocMaker, WriteLineDoc can now
|
||||
read/write bz2 using Apache commons compress library. This means
|
||||
you can download the .bz2 export from http://wikipedia.org and
|
||||
immediately index it. (Shai Erera via Mike McCandless)
|
||||
|
||||
5. LUCENE-1629: Add SmartChineseAnalyzer to contrib/analyzers. It
|
||||
4. LUCENE-1629: Add SmartChineseAnalyzer to contrib/analyzers. It
|
||||
improves on CJKAnalyzer and ChineseAnalyzer by handling Chinese
|
||||
sentences properly. SmartChineseAnalyzer uses a Hidden Markov
|
||||
Model to tokenize Chinese words in a more intelligent way.
|
||||
(Xiaoping Gao via Mike McCandless)
|
||||
|
||||
5. LUCENE-1676: Added DelimitedPayloadTokenFilter class for automatically adding payloads "in-stream" (Grant Ingersoll)
|
||||
|
||||
6. LUCENE-1676: Added DelimitedPayloadTokenFilter class for automatically adding payloads "in-stream" (Grant Ingersoll)
|
||||
|
||||
7. LUCENE-1578: Support for loading unoptimized readers to the
|
||||
6. LUCENE-1578: Support for loading unoptimized readers to the
|
||||
constructor of InstantiatedIndex. (Karl Wettin)
|
||||
|
||||
Optimizations
|
||||
|
|
|
@ -1,13 +1,5 @@
|
|||
This module contains a number of filter and query objects that add to core lucene.
|
||||
|
||||
==== TrieRangeFilter/TrieRangeQuery - This package provides fast numeric range queries/filters on long,
|
||||
double or Date fields based on trie structures. Numerical values are stored in a special string-encoded
|
||||
format with variable precision (all numerical values like doubles, longs, and timestamps are converted
|
||||
to lexicographic sortable string representations). A range is then divided recursively into multiple
|
||||
intervals for searching: The center of the range is searched only with the lowest possible precision
|
||||
in the trie, the boundaries are matched more exactly. This reduces the number of terms and thus improves
|
||||
dramatically the performance of range queries, on the cost of larger index sizes.
|
||||
|
||||
==== The "MoreLikeThis" class from the "similarity" module has been copied into here.
|
||||
If people are generally happy with this move then the similarity module can be deleted, or at least a
|
||||
"Moved to queries module..." note left in its place.
|
||||
|
@ -26,5 +18,5 @@ a sequence. An example might be a collection of primary keys from a database que
|
|||
a choice of "category" labels picked by the end user.
|
||||
|
||||
|
||||
Mark Harwood & Uwe Schindler
|
||||
05/12/2008
|
||||
Mark Harwood
|
||||
25/02/2006
|
||||
|
|
|
@ -1,116 +0,0 @@
|
|||
package org.apache.lucene.search.trie;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.search.Filter;
|
||||
import org.apache.lucene.search.MultiTermQuery;
|
||||
import org.apache.lucene.search.FilteredTermEnum;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
|
||||
abstract class AbstractTrieRangeQuery extends MultiTermQuery {
|
||||
|
||||
AbstractTrieRangeQuery(final String field, final int precisionStep,
|
||||
Number min, Number max, final boolean minInclusive, final boolean maxInclusive
|
||||
) {
|
||||
this.field = field.intern();
|
||||
this.precisionStep = precisionStep;
|
||||
this.min = min;
|
||||
this.max = max;
|
||||
this.minInclusive = minInclusive;
|
||||
this.maxInclusive = maxInclusive;
|
||||
setConstantScoreRewrite(true);
|
||||
}
|
||||
|
||||
abstract void passRanges(TrieRangeTermEnum enumerator);
|
||||
|
||||
//@Override
|
||||
protected FilteredTermEnum getEnum(final IndexReader reader) throws IOException {
|
||||
TrieRangeTermEnum enumerator = new TrieRangeTermEnum(this, reader);
|
||||
passRanges(enumerator);
|
||||
enumerator.init();
|
||||
return enumerator;
|
||||
}
|
||||
|
||||
/** Returns the field name for this query */
|
||||
public String getField() { return field; }
|
||||
|
||||
/** Returns <code>true</code> if the lower endpoint is inclusive */
|
||||
public boolean includesMin() { return minInclusive; }
|
||||
|
||||
/** Returns <code>true</code> if the upper endpoint is inclusive */
|
||||
public boolean includesMax() { return maxInclusive; }
|
||||
|
||||
//@Override
|
||||
public String toString(final String field) {
|
||||
final StringBuffer sb=new StringBuffer();
|
||||
if (!this.field.equals(field)) sb.append(this.field).append(':');
|
||||
return sb.append(minInclusive ? '[' : '{')
|
||||
.append((min==null) ? "*" : min.toString())
|
||||
.append(" TO ")
|
||||
.append((max==null) ? "*" : max.toString())
|
||||
.append(maxInclusive ? ']' : '}')
|
||||
.append(ToStringUtils.boost(getBoost()))
|
||||
.toString();
|
||||
}
|
||||
|
||||
//@Override
|
||||
public final boolean equals(final Object o) {
|
||||
if (o==this) return true;
|
||||
if (o==null) return false;
|
||||
if (this.getClass().equals(o.getClass())) {
|
||||
AbstractTrieRangeQuery q=(AbstractTrieRangeQuery)o;
|
||||
return (
|
||||
field==q.field &&
|
||||
(q.min == null ? min == null : q.min.equals(min)) &&
|
||||
(q.max == null ? max == null : q.max.equals(max)) &&
|
||||
minInclusive==q.minInclusive &&
|
||||
maxInclusive==q.maxInclusive &&
|
||||
precisionStep==q.precisionStep &&
|
||||
getBoost()==q.getBoost()
|
||||
);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
//@Override
|
||||
public final int hashCode() {
|
||||
int hash = Float.floatToIntBits(getBoost()) ^ field.hashCode();
|
||||
hash += precisionStep^0x64365465;
|
||||
if (min!=null) hash += min.hashCode()^0x14fa55fb;
|
||||
if (max!=null) hash += max.hashCode()^0x733fa5fe;
|
||||
return hash+
|
||||
(Boolean.valueOf(minInclusive).hashCode()^0x14fa55fb)+
|
||||
(Boolean.valueOf(maxInclusive).hashCode()^0x733fa5fe);
|
||||
}
|
||||
|
||||
// TODO: Make this method accessible by *TrieRangeFilter,
|
||||
// can be removed, when moved to core.
|
||||
//@Override
|
||||
protected Filter getFilter() {
|
||||
return super.getFilter();
|
||||
}
|
||||
|
||||
// members
|
||||
final String field;
|
||||
final int precisionStep;
|
||||
final Number min,max;
|
||||
final boolean minInclusive,maxInclusive;
|
||||
}
|
|
@ -1,61 +0,0 @@
|
|||
package org.apache.lucene.search.trie;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.search.Filter; // for javadocs
|
||||
import org.apache.lucene.search.MultiTermQueryWrapperFilter;
|
||||
|
||||
/**
|
||||
* Implementation of a Lucene {@link Filter} that implements trie-based range filtering for ints/floats.
|
||||
* This filter depends on a specific structure of terms in the index that can only be created
|
||||
* by indexing via {@link IntTrieTokenStream} methods.
|
||||
* For more information, how the algorithm works, see the {@linkplain org.apache.lucene.search.trie package description}.
|
||||
*/
|
||||
public class IntTrieRangeFilter extends MultiTermQueryWrapperFilter {
|
||||
|
||||
/**
|
||||
* A trie filter for matching trie coded values using the given field name and
|
||||
* the default helper field.
|
||||
* <code>precisionStep</code> must me equal or a multiple of the <code>precisionStep</code>
|
||||
* used for indexing the values.
|
||||
* You can leave the bounds open, by supplying <code>null</code> for <code>min</code> and/or
|
||||
* <code>max</code>. Inclusive/exclusive bounds can also be supplied.
|
||||
* To filter float values use the converter {@link TrieUtils#floatToSortableInt}.
|
||||
*/
|
||||
public IntTrieRangeFilter(final String field, final int precisionStep,
|
||||
final Integer min, final Integer max, final boolean minInclusive, final boolean maxInclusive
|
||||
) {
|
||||
super(new IntTrieRangeQuery(field,precisionStep,min,max,minInclusive,maxInclusive));
|
||||
}
|
||||
|
||||
/** Returns the field name for this filter */
|
||||
public String getField() { return ((IntTrieRangeQuery)query).getField(); }
|
||||
|
||||
/** Returns <code>true</code> if the lower endpoint is inclusive */
|
||||
public boolean includesMin() { return ((IntTrieRangeQuery)query).includesMin(); }
|
||||
|
||||
/** Returns <code>true</code> if the upper endpoint is inclusive */
|
||||
public boolean includesMax() { return ((IntTrieRangeQuery)query).includesMax(); }
|
||||
|
||||
/** Returns the lower value of this range filter */
|
||||
public Integer getMin() { return ((IntTrieRangeQuery)query).getMin(); }
|
||||
|
||||
/** Returns the upper value of this range filter */
|
||||
public Integer getMax() { return ((IntTrieRangeQuery)query).getMax(); }
|
||||
|
||||
}
|
|
@ -1,66 +0,0 @@
|
|||
package org.apache.lucene.search.trie;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.search.Query;
|
||||
|
||||
/**
|
||||
* Implementation of a Lucene {@link Query} that implements trie-based range querying for ints/floats.
|
||||
* This query depends on a specific structure of terms in the index that can only be created
|
||||
* by indexing via {@link IntTrieTokenStream} methods.
|
||||
* <p>The query is in constant score mode per default. With precision steps of ≤4, this
|
||||
* query can be run in conventional boolean rewrite mode without changing the max clause count.
|
||||
* For more information, how the algorithm works, see the {@linkplain org.apache.lucene.search.trie package description}.
|
||||
*/
|
||||
public class IntTrieRangeQuery extends AbstractTrieRangeQuery {
|
||||
|
||||
/**
|
||||
* A trie query for matching trie coded values using the given field name and
|
||||
* the default helper field.
|
||||
* <code>precisionStep</code> must me equal or a multiple of the <code>precisionStep</code>
|
||||
* used for indexing the values.
|
||||
* You can leave the bounds open, by supplying <code>null</code> for <code>min</code> and/or
|
||||
* <code>max</code>. Inclusive/exclusive bounds can also be supplied.
|
||||
* To query float values use the converter {@link TrieUtils#floatToSortableInt}.
|
||||
*/
|
||||
public IntTrieRangeQuery(final String field, final int precisionStep,
|
||||
final Integer min, final Integer max, final boolean minInclusive, final boolean maxInclusive
|
||||
) {
|
||||
super(field,precisionStep,min,max,minInclusive,maxInclusive);
|
||||
}
|
||||
|
||||
//@Override
|
||||
void passRanges(TrieRangeTermEnum enumerator) {
|
||||
// calculate the upper and lower bounds respecting the inclusive and null values.
|
||||
int minBound=(this.min==null) ? Integer.MIN_VALUE : (
|
||||
minInclusive ? this.min.intValue() : (this.min.intValue()+1)
|
||||
);
|
||||
int maxBound=(this.max==null) ? Integer.MAX_VALUE : (
|
||||
maxInclusive ? this.max.intValue() : (this.max.intValue()-1)
|
||||
);
|
||||
|
||||
TrieUtils.splitIntRange(enumerator.getIntRangeBuilder(), precisionStep, minBound, maxBound);
|
||||
}
|
||||
|
||||
/** Returns the lower value of this range query */
|
||||
public Integer getMin() { return (Integer)min; }
|
||||
|
||||
/** Returns the upper value of this range query */
|
||||
public Integer getMax() { return (Integer)max; }
|
||||
|
||||
}
|
|
@ -1,172 +0,0 @@
|
|||
package org.apache.lucene.search.trie;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
|
||||
/**
|
||||
* This class provides a {@link TokenStream} for indexing <code>int</code> values
|
||||
* that can be queried by {@link IntTrieRangeFilter}. This stream is not intended
|
||||
* to be used in analyzers, its more for iterating the different precisions during
|
||||
* indexing a specific numeric value.
|
||||
* <p>A <code>int</code> value is indexed as multiple string encoded terms, each reduced
|
||||
* by zeroing bits from the right. Each value is also prefixed (in the first char) by the
|
||||
* <code>shift</code> value (number of bits removed) used during encoding.
|
||||
* <p>The number of bits removed from the right for each trie entry is called
|
||||
* <code>precisionStep</code> in this API. For comparing the different step values, see the
|
||||
* {@linkplain org.apache.lucene.search.trie package description}.
|
||||
* <p>The usage pattern is (it is recommened to switch off norms and term frequencies
|
||||
* for numeric fields; it does not make sense to have them):
|
||||
* <pre>
|
||||
* Field field = new Field(name, new IntTrieTokenStream(value, precisionStep));
|
||||
* field.setOmitNorms(true);
|
||||
* field.setOmitTermFreqAndPositions(true);
|
||||
* document.add(field);
|
||||
* </pre>
|
||||
* <p>For optimal performance, re-use the TokenStream and Field instance
|
||||
* for more than one document:
|
||||
* <pre>
|
||||
* <em>// init</em>
|
||||
* IntTrieTokenStream stream = new IntTrieTokenStream(precisionStep);
|
||||
* Field field = new Field(name, stream);
|
||||
* field.setOmitNorms(true);
|
||||
* field.setOmitTermFreqAndPositions(true);
|
||||
* Document doc = new Document();
|
||||
* document.add(field);
|
||||
* <em>// use this code to index many documents:</em>
|
||||
* stream.setValue(value1)
|
||||
* writer.addDocument(document);
|
||||
* stream.setValue(value2)
|
||||
* writer.addDocument(document);
|
||||
* ...
|
||||
* </pre>
|
||||
* <p><em>Please note:</em> Token streams are read, when the document is added to index.
|
||||
* If you index more than one numeric field, use a separate instance for each.
|
||||
* <p>For more information, how trie fields work, see the
|
||||
* {@linkplain org.apache.lucene.search.trie package description}.
|
||||
*/
|
||||
public class IntTrieTokenStream extends TokenStream {
|
||||
|
||||
/** The full precision token gets this token type assigned. */
|
||||
public static final String TOKEN_TYPE_FULL_PREC = "fullPrecTrieInt";
|
||||
|
||||
/** The lower precision tokens gets this token type assigned. */
|
||||
public static final String TOKEN_TYPE_LOWER_PREC = "lowerPrecTrieInt";
|
||||
|
||||
/**
|
||||
* Creates a token stream for indexing <code>value</code> with the given
|
||||
* <code>precisionStep</code>. As instance creating is a major cost,
|
||||
* consider using a {@link #IntTrieTokenStream(int)} instance once for
|
||||
* indexing a large number of documents and assign a value with
|
||||
* {@link #setValue} for each document.
|
||||
* To index float values use the converter {@link TrieUtils#doubleToSortableLong}.
|
||||
*/
|
||||
public IntTrieTokenStream(final int value, final int precisionStep) {
|
||||
if (precisionStep<1 || precisionStep>32)
|
||||
throw new IllegalArgumentException("precisionStep may only be 1..32");
|
||||
this.value = value;
|
||||
this.precisionStep = precisionStep;
|
||||
termAtt = (TermAttribute) addAttribute(TermAttribute.class);
|
||||
typeAtt = (TypeAttribute) addAttribute(TypeAttribute.class);
|
||||
posIncrAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
|
||||
shiftAtt = (ShiftAttribute) addAttribute(ShiftAttribute.class);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a token stream for indexing values with the given
|
||||
* <code>precisionStep</code>. This stream is initially "empty"
|
||||
* (using a numeric value of 0), assign a value before indexing
|
||||
* each document using {@link #setValue}.
|
||||
*/
|
||||
public IntTrieTokenStream(final int precisionStep) {
|
||||
this(0, precisionStep);
|
||||
}
|
||||
|
||||
/**
|
||||
* Resets the token stream to deliver prefix encoded values
|
||||
* for <code>value</code>. Use this method to index the same
|
||||
* numeric field for a large number of documents and reuse the
|
||||
* current stream instance.
|
||||
* To index float values use the converter {@link TrieUtils#doubleToSortableLong}.
|
||||
*/
|
||||
public void setValue(final int value) {
|
||||
this.value = value;
|
||||
reset();
|
||||
}
|
||||
|
||||
// @Override
|
||||
public void reset() {
|
||||
shift = 0;
|
||||
}
|
||||
|
||||
// @Override
|
||||
public boolean incrementToken() {
|
||||
if (shift>=32) return false;
|
||||
final char[] buffer = termAtt.resizeTermBuffer(TrieUtils.INT_BUF_SIZE);
|
||||
termAtt.setTermLength(TrieUtils.intToPrefixCoded(value, shift, buffer));
|
||||
shiftAtt.setShift(shift);
|
||||
if (shift==0) {
|
||||
typeAtt.setType(TOKEN_TYPE_FULL_PREC);
|
||||
posIncrAtt.setPositionIncrement(1);
|
||||
} else {
|
||||
typeAtt.setType(TOKEN_TYPE_LOWER_PREC);
|
||||
posIncrAtt.setPositionIncrement(0);
|
||||
}
|
||||
shift += precisionStep;
|
||||
return true;
|
||||
}
|
||||
|
||||
// @Override
|
||||
/** @deprecated */
|
||||
public Token next(final Token reusableToken) {
|
||||
if (shift>=32) return null;
|
||||
reusableToken.clear();
|
||||
final char[] buffer = reusableToken.resizeTermBuffer(TrieUtils.INT_BUF_SIZE);
|
||||
reusableToken.setTermLength(TrieUtils.intToPrefixCoded(value, shift, buffer));
|
||||
if (shift==0) {
|
||||
reusableToken.setType(TOKEN_TYPE_FULL_PREC);
|
||||
reusableToken.setPositionIncrement(1);
|
||||
} else {
|
||||
reusableToken.setType(TOKEN_TYPE_LOWER_PREC);
|
||||
reusableToken.setPositionIncrement(0);
|
||||
}
|
||||
shift += precisionStep;
|
||||
return reusableToken;
|
||||
}
|
||||
|
||||
// @Override
|
||||
public String toString() {
|
||||
final StringBuffer sb = new StringBuffer("(trie-int,value=").append(value);
|
||||
sb.append(",precisionStep=").append(precisionStep).append(')');
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
// members
|
||||
private final TermAttribute termAtt;
|
||||
private final TypeAttribute typeAtt;
|
||||
private final PositionIncrementAttribute posIncrAtt;
|
||||
private final ShiftAttribute shiftAtt;
|
||||
|
||||
private int shift = 0;
|
||||
private int value;
|
||||
private final int precisionStep;
|
||||
}
|
|
@ -1,61 +0,0 @@
|
|||
package org.apache.lucene.search.trie;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.search.Filter; // for javadocs
|
||||
import org.apache.lucene.search.MultiTermQueryWrapperFilter;
|
||||
|
||||
/**
|
||||
* Implementation of a Lucene {@link Filter} that implements trie-based range filtering for longs/doubles.
|
||||
* This filter depends on a specific structure of terms in the index that can only be created
|
||||
* by indexing via {@link LongTrieTokenStream} methods.
|
||||
* For more information, how the algorithm works, see the {@linkplain org.apache.lucene.search.trie package description}.
|
||||
*/
|
||||
public class LongTrieRangeFilter extends MultiTermQueryWrapperFilter {
|
||||
|
||||
/**
|
||||
* A trie filter for matching trie coded values using the given field name and
|
||||
* the default helper field.
|
||||
* <code>precisionStep</code> must me equal or a multiple of the <code>precisionStep</code>
|
||||
* used for indexing the values.
|
||||
* You can leave the bounds open, by supplying <code>null</code> for <code>min</code> and/or
|
||||
* <code>max</code>. Inclusive/exclusive bounds can also be supplied.
|
||||
* To filter double values use the converter {@link TrieUtils#doubleToSortableLong}.
|
||||
*/
|
||||
public LongTrieRangeFilter(final String field, final int precisionStep,
|
||||
final Long min, final Long max, final boolean minInclusive, final boolean maxInclusive
|
||||
) {
|
||||
super(new LongTrieRangeQuery(field,precisionStep,min,max,minInclusive,maxInclusive));
|
||||
}
|
||||
|
||||
/** Returns the field name for this filter */
|
||||
public String getField() { return ((LongTrieRangeQuery)query).getField(); }
|
||||
|
||||
/** Returns <code>true</code> if the lower endpoint is inclusive */
|
||||
public boolean includesMin() { return ((LongTrieRangeQuery)query).includesMin(); }
|
||||
|
||||
/** Returns <code>true</code> if the upper endpoint is inclusive */
|
||||
public boolean includesMax() { return ((LongTrieRangeQuery)query).includesMax(); }
|
||||
|
||||
/** Returns the lower value of this range filter */
|
||||
public Long getMin() { return ((LongTrieRangeQuery)query).getMin(); }
|
||||
|
||||
/** Returns the upper value of this range filter */
|
||||
public Long getMax() { return ((LongTrieRangeQuery)query).getMax(); }
|
||||
|
||||
}
|
|
@ -1,66 +0,0 @@
|
|||
package org.apache.lucene.search.trie;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.search.Query;
|
||||
|
||||
/**
|
||||
* Implementation of a Lucene {@link Query} that implements trie-based range querying for longs/doubles.
|
||||
* This query depends on a specific structure of terms in the index that can only be created
|
||||
* by indexing via {@link LongTrieTokenStream} methods.
|
||||
* <p>The query is in constant score mode per default. With precision steps of ≤4, this
|
||||
* query can be run in conventional boolean rewrite mode without changing the max clause count.
|
||||
* For more information, how the algorithm works, see the {@linkplain org.apache.lucene.search.trie package description}.
|
||||
*/
|
||||
public class LongTrieRangeQuery extends AbstractTrieRangeQuery {
|
||||
|
||||
/**
|
||||
* A trie query for matching trie coded values using the given field name and
|
||||
* the default helper field.
|
||||
* <code>precisionStep</code> must me equal or a multiple of the <code>precisionStep</code>
|
||||
* used for indexing the values.
|
||||
* You can leave the bounds open, by supplying <code>null</code> for <code>min</code> and/or
|
||||
* <code>max</code>. Inclusive/exclusive bounds can also be supplied.
|
||||
* To query double values use the converter {@link TrieUtils#doubleToSortableLong}.
|
||||
*/
|
||||
public LongTrieRangeQuery(final String field, final int precisionStep,
|
||||
final Long min, final Long max, final boolean minInclusive, final boolean maxInclusive
|
||||
) {
|
||||
super(field,precisionStep,min,max,minInclusive,maxInclusive);
|
||||
}
|
||||
|
||||
//@Override
|
||||
void passRanges(TrieRangeTermEnum enumerator) {
|
||||
// calculate the upper and lower bounds respecting the inclusive and null values.
|
||||
long minBound=(this.min==null) ? Long.MIN_VALUE : (
|
||||
minInclusive ? this.min.longValue() : (this.min.longValue()+1L)
|
||||
);
|
||||
long maxBound=(this.max==null) ? Long.MAX_VALUE : (
|
||||
maxInclusive ? this.max.longValue() : (this.max.longValue()-1L)
|
||||
);
|
||||
|
||||
TrieUtils.splitLongRange(enumerator.getLongRangeBuilder(), precisionStep, minBound, maxBound);
|
||||
}
|
||||
|
||||
/** Returns the lower value of this range query */
|
||||
public Long getMin() { return (Long)min; }
|
||||
|
||||
/** Returns the upper value of this range query */
|
||||
public Long getMax() { return (Long)max; }
|
||||
|
||||
}
|
|
@ -1,172 +0,0 @@
|
|||
package org.apache.lucene.search.trie;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
|
||||
/**
|
||||
* This class provides a {@link TokenStream} for indexing <code>long</code> values
|
||||
* that can be queried by {@link LongTrieRangeFilter}. This stream is not intended
|
||||
* to be used in analyzers, its more for iterating the different precisions during
|
||||
* indexing a specific numeric value.
|
||||
* <p>A <code>long</code> value is indexed as multiple string encoded terms, each reduced
|
||||
* by zeroing bits from the right. Each value is also prefixed (in the first char) by the
|
||||
* <code>shift</code> value (number of bits removed) used during encoding.
|
||||
* <p>The number of bits removed from the right for each trie entry is called
|
||||
* <code>precisionStep</code> in this API. For comparing the different step values, see the
|
||||
* {@linkplain org.apache.lucene.search.trie package description}.
|
||||
* <p>The usage pattern is (it is recommened to switch off norms and term frequencies
|
||||
* for numeric fields; it does not make sense to have them):
|
||||
* <pre>
|
||||
* Field field = new Field(name, new LongTrieTokenStream(value, precisionStep));
|
||||
* field.setOmitNorms(true);
|
||||
* field.setOmitTermFreqAndPositions(true);
|
||||
* document.add(field);
|
||||
* </pre>
|
||||
* <p>For optimal performance, re-use the TokenStream and Field instance
|
||||
* for more than one document:
|
||||
* <pre>
|
||||
* <em>// init</em>
|
||||
* LongTrieTokenStream stream = new LongTrieTokenStream(precisionStep);
|
||||
* Field field = new Field(name, stream);
|
||||
* field.setOmitNorms(true);
|
||||
* field.setOmitTermFreqAndPositions(true);
|
||||
* Document doc = new Document();
|
||||
* document.add(field);
|
||||
* <em>// use this code to index many documents:</em>
|
||||
* stream.setValue(value1)
|
||||
* writer.addDocument(document);
|
||||
* stream.setValue(value2)
|
||||
* writer.addDocument(document);
|
||||
* ...
|
||||
* </pre>
|
||||
* <p><em>Please note:</em> Token streams are read, when the document is added to index.
|
||||
* If you index more than one numeric field, use a separate instance for each.
|
||||
* <p>For more information, how trie fields work, see the
|
||||
* {@linkplain org.apache.lucene.search.trie package description}.
|
||||
*/
|
||||
public class LongTrieTokenStream extends TokenStream {
|
||||
|
||||
/** The full precision token gets this token type assigned. */
|
||||
public static final String TOKEN_TYPE_FULL_PREC = "fullPrecTrieLong";
|
||||
|
||||
/** The lower precision tokens gets this token type assigned. */
|
||||
public static final String TOKEN_TYPE_LOWER_PREC = "lowerPrecTrieLong";
|
||||
|
||||
/**
|
||||
* Creates a token stream for indexing <code>value</code> with the given
|
||||
* <code>precisionStep</code>. As instance creating is a major cost,
|
||||
* consider using a {@link #LongTrieTokenStream(int)} instance once for
|
||||
* indexing a large number of documents and assign a value with
|
||||
* {@link #setValue} for each document.
|
||||
* To index double values use the converter {@link TrieUtils#doubleToSortableLong}.
|
||||
*/
|
||||
public LongTrieTokenStream(final long value, final int precisionStep) {
|
||||
if (precisionStep<1 || precisionStep>64)
|
||||
throw new IllegalArgumentException("precisionStep may only be 1..64");
|
||||
this.value = value;
|
||||
this.precisionStep = precisionStep;
|
||||
termAtt = (TermAttribute) addAttribute(TermAttribute.class);
|
||||
typeAtt = (TypeAttribute) addAttribute(TypeAttribute.class);
|
||||
posIncrAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
|
||||
shiftAtt = (ShiftAttribute) addAttribute(ShiftAttribute.class);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a token stream for indexing values with the given
|
||||
* <code>precisionStep</code>. This stream is initially "empty"
|
||||
* (using a numeric value of 0), assign a value before indexing
|
||||
* each document using {@link #setValue}.
|
||||
*/
|
||||
public LongTrieTokenStream(final int precisionStep) {
|
||||
this(0L, precisionStep);
|
||||
}
|
||||
|
||||
/**
|
||||
* Resets the token stream to deliver prefix encoded values
|
||||
* for <code>value</code>. Use this method to index the same
|
||||
* numeric field for a large number of documents and reuse the
|
||||
* current stream instance.
|
||||
* To index double values use the converter {@link TrieUtils#doubleToSortableLong}.
|
||||
*/
|
||||
public void setValue(final long value) {
|
||||
this.value = value;
|
||||
reset();
|
||||
}
|
||||
|
||||
// @Override
|
||||
public void reset() {
|
||||
shift = 0;
|
||||
}
|
||||
|
||||
// @Override
|
||||
public boolean incrementToken() {
|
||||
if (shift>=64) return false;
|
||||
final char[] buffer = termAtt.resizeTermBuffer(TrieUtils.LONG_BUF_SIZE);
|
||||
termAtt.setTermLength(TrieUtils.longToPrefixCoded(value, shift, buffer));
|
||||
shiftAtt.setShift(shift);
|
||||
if (shift==0) {
|
||||
typeAtt.setType(TOKEN_TYPE_FULL_PREC);
|
||||
posIncrAtt.setPositionIncrement(1);
|
||||
} else {
|
||||
typeAtt.setType(TOKEN_TYPE_LOWER_PREC);
|
||||
posIncrAtt.setPositionIncrement(0);
|
||||
}
|
||||
shift += precisionStep;
|
||||
return true;
|
||||
}
|
||||
|
||||
// @Override
|
||||
/** @deprecated */
|
||||
public Token next(final Token reusableToken) {
|
||||
if (shift>=64) return null;
|
||||
reusableToken.clear();
|
||||
final char[] buffer = reusableToken.resizeTermBuffer(TrieUtils.LONG_BUF_SIZE);
|
||||
reusableToken.setTermLength(TrieUtils.longToPrefixCoded(value, shift, buffer));
|
||||
if (shift==0) {
|
||||
reusableToken.setType(TOKEN_TYPE_FULL_PREC);
|
||||
reusableToken.setPositionIncrement(1);
|
||||
} else {
|
||||
reusableToken.setType(TOKEN_TYPE_LOWER_PREC);
|
||||
reusableToken.setPositionIncrement(0);
|
||||
}
|
||||
shift += precisionStep;
|
||||
return reusableToken;
|
||||
}
|
||||
|
||||
// @Override
|
||||
public String toString() {
|
||||
final StringBuffer sb = new StringBuffer("(trie-long,value=").append(value);
|
||||
sb.append(",precisionStep=").append(precisionStep).append(')');
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
// members
|
||||
private final TermAttribute termAtt;
|
||||
private final TypeAttribute typeAtt;
|
||||
private final PositionIncrementAttribute posIncrAtt;
|
||||
private final ShiftAttribute shiftAtt;
|
||||
|
||||
private int shift = 0;
|
||||
private long value;
|
||||
private final int precisionStep;
|
||||
}
|
|
@ -1,70 +0,0 @@
|
|||
package org.apache.lucene.search.trie;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.util.Attribute;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* This attribute is updated by {@link IntTrieTokenStream} and {@link LongTrieTokenStream}
|
||||
* to the shift value of the current prefix-encoded token.
|
||||
* It may be used by filters or consumers to e.g. distribute the values to various fields.
|
||||
*/
|
||||
public final class ShiftAttribute extends Attribute implements Cloneable, Serializable {
|
||||
private int shift = 0;
|
||||
|
||||
/**
|
||||
* Returns the shift value of the current prefix encoded token.
|
||||
*/
|
||||
public int getShift() {
|
||||
return shift;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the shift value.
|
||||
*/
|
||||
public void setShift(final int shift) {
|
||||
this.shift = shift;
|
||||
}
|
||||
|
||||
public void clear() {
|
||||
shift = 0;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return "shift=" + shift;
|
||||
}
|
||||
|
||||
public boolean equals(Object other) {
|
||||
if (this == other) return true;
|
||||
if (other instanceof ShiftAttribute) {
|
||||
return ((ShiftAttribute) other).shift == shift;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public int hashCode() {
|
||||
return shift;
|
||||
}
|
||||
|
||||
public void copyTo(Attribute target) {
|
||||
final ShiftAttribute t = (ShiftAttribute) target;
|
||||
t.setShift(shift);
|
||||
}
|
||||
}
|
|
@ -1,140 +0,0 @@
|
|||
package org.apache.lucene.search.trie;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.LinkedList;
|
||||
|
||||
import org.apache.lucene.search.FilteredTermEnum;
|
||||
import org.apache.lucene.search.MultiTermQuery; // for javadocs
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
|
||||
/**
|
||||
* Subclass of FilteredTermEnum for enumerating all terms that match the
|
||||
* sub-ranges for trie range queries.
|
||||
* <p>
|
||||
* WARNING: Term enumerations is not guaranteed to be always ordered by
|
||||
* {@link Term#compareTo}.
|
||||
* The ordering depends on how {@link TrieUtils#splitLongRange} and
|
||||
* {@link TrieUtils#splitIntRange} generates the sub-ranges. For
|
||||
* the {@link MultiTermQuery} ordering is not relevant.
|
||||
*/
|
||||
final class TrieRangeTermEnum extends FilteredTermEnum {
|
||||
|
||||
private final AbstractTrieRangeQuery query;
|
||||
private final IndexReader reader;
|
||||
private final LinkedList/*<String>*/ rangeBounds = new LinkedList/*<String>*/();
|
||||
private String currentUpperBound = null;
|
||||
|
||||
TrieRangeTermEnum(AbstractTrieRangeQuery query, IndexReader reader) {
|
||||
this.query = query;
|
||||
this.reader = reader;
|
||||
}
|
||||
|
||||
/** Returns a range builder that must be used to feed in the sub-ranges. */
|
||||
TrieUtils.IntRangeBuilder getIntRangeBuilder() {
|
||||
return new TrieUtils.IntRangeBuilder() {
|
||||
//@Override
|
||||
public final void addRange(String minPrefixCoded, String maxPrefixCoded) {
|
||||
rangeBounds.add(minPrefixCoded);
|
||||
rangeBounds.add(maxPrefixCoded);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/** Returns a range builder that must be used to feed in the sub-ranges. */
|
||||
TrieUtils.LongRangeBuilder getLongRangeBuilder() {
|
||||
return new TrieUtils.LongRangeBuilder() {
|
||||
//@Override
|
||||
public final void addRange(String minPrefixCoded, String maxPrefixCoded) {
|
||||
rangeBounds.add(minPrefixCoded);
|
||||
rangeBounds.add(maxPrefixCoded);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/** After feeding the range builder call this method to initialize the enum. */
|
||||
void init() throws IOException {
|
||||
next();
|
||||
}
|
||||
|
||||
//@Override
|
||||
public float difference() {
|
||||
return 1.0f;
|
||||
}
|
||||
|
||||
/** this is a dummy, it is not used by this class. */
|
||||
//@Override
|
||||
protected boolean endEnum() {
|
||||
assert false; // should never be called
|
||||
return (currentTerm != null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Compares if current upper bound is reached,
|
||||
* this also updates the term count for statistics.
|
||||
* In contrast to {@link FilteredTermEnum}, a return value
|
||||
* of <code>false</code> ends iterating the current enum
|
||||
* and forwards to the next sub-range.
|
||||
*/
|
||||
//@Override
|
||||
protected boolean termCompare(Term term) {
|
||||
return (term.field() == query.field && term.text().compareTo(currentUpperBound) <= 0);
|
||||
}
|
||||
|
||||
/** Increments the enumeration to the next element. True if one exists. */
|
||||
//@Override
|
||||
public boolean next() throws IOException {
|
||||
// if a current term exists, the actual enum is initialized:
|
||||
// try change to next term, if no such term exists, fall-through
|
||||
if (currentTerm != null) {
|
||||
assert actualEnum!=null;
|
||||
if (actualEnum.next()) {
|
||||
currentTerm = actualEnum.term();
|
||||
if (termCompare(currentTerm)) return true;
|
||||
}
|
||||
}
|
||||
// if all above fails, we go forward to the next enum,
|
||||
// if one is available
|
||||
currentTerm = null;
|
||||
if (rangeBounds.size() < 2) return false;
|
||||
// close the current enum and read next bounds
|
||||
if (actualEnum != null) {
|
||||
actualEnum.close();
|
||||
actualEnum = null;
|
||||
}
|
||||
final String lowerBound = (String)rangeBounds.removeFirst();
|
||||
this.currentUpperBound = (String)rangeBounds.removeFirst();
|
||||
// this call recursively uses next(), if no valid term in
|
||||
// next enum found.
|
||||
// if this behavior is changed/modified in the superclass,
|
||||
// this enum will not work anymore!
|
||||
setEnum(reader.terms(new Term(query.field, lowerBound)));
|
||||
return (currentTerm != null);
|
||||
}
|
||||
|
||||
/** Closes the enumeration to further activity, freeing resources. */
|
||||
//@Override
|
||||
public void close() throws IOException {
|
||||
rangeBounds.clear();
|
||||
currentUpperBound = null;
|
||||
super.close();
|
||||
}
|
||||
|
||||
}
|
|
@ -1,446 +0,0 @@
|
|||
package org.apache.lucene.search.trie;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.FieldCache;
|
||||
import org.apache.lucene.search.ExtendedFieldCache;
|
||||
|
||||
/**
|
||||
* This is a helper class to generate prefix-encoded representations for numerical values
|
||||
* and supplies converters to represent float/double values as sortable integers/longs.
|
||||
* <p>To quickly execute range queries in Apache Lucene, a range is divided recursively
|
||||
* into multiple intervals for searching: The center of the range is searched only with
|
||||
* the lowest possible precision in the trie, while the boundaries are matched
|
||||
* more exactly. This reduces the number of terms dramatically.
|
||||
* <p>This class generates terms to achive this: First the numerical integer values need to
|
||||
* be converted to strings. For that integer values (32 bit or 64 bit) are made unsigned
|
||||
* and the bits are converted to ASCII chars with each 7 bit. The resulting string is
|
||||
* sortable like the original integer value. Each value is also prefixed
|
||||
* (in the first char) by the <code>shift</code> value (number of bits removed) used
|
||||
* during encoding.
|
||||
* <p>To also index floating point numbers, this class supplies two methods to convert them
|
||||
* to integer values by changing their bit layout: {@link #doubleToSortableLong},
|
||||
* {@link #floatToSortableInt}. You will have no precision loss by
|
||||
* converting floating point numbers to integers and back (only that the integer form
|
||||
* is not usable). Other data types like dates can easily converted to longs or ints (e.g.
|
||||
* date to long: {@link java.util.Date#getTime}).
|
||||
* <p>Prefix encoded fields can also be sorted using the {@link SortField} factories
|
||||
* {@link #getLongSortField} or {@link #getIntSortField}.
|
||||
*/
|
||||
public final class TrieUtils {
|
||||
|
||||
private TrieUtils() {} // no instance!
|
||||
|
||||
/**
|
||||
* Longs are stored at lower precision by shifting off lower bits. The shift count is
|
||||
* stored as <code>SHIFT_START_LONG+shift</code> in the first character
|
||||
*/
|
||||
public static final char SHIFT_START_LONG = (char)0x20;
|
||||
|
||||
/** internal: maximum needed <code>char[]</code> buffer size for encoding */
|
||||
static final int LONG_BUF_SIZE = 63/7 + 2;
|
||||
|
||||
/**
|
||||
* Integers are stored at lower precision by shifting off lower bits. The shift count is
|
||||
* stored as <code>SHIFT_START_INT+shift</code> in the first character
|
||||
*/
|
||||
public static final char SHIFT_START_INT = (char)0x60;
|
||||
|
||||
/** internal: maximum needed <code>char[]</code> buffer size for encoding */
|
||||
static final int INT_BUF_SIZE = 31/7 + 2;
|
||||
|
||||
/**
|
||||
* A parser instance for filling a {@link ExtendedFieldCache}, that parses prefix encoded fields as longs.
|
||||
*/
|
||||
public static final ExtendedFieldCache.LongParser FIELD_CACHE_LONG_PARSER=new ExtendedFieldCache.LongParser(){
|
||||
public final long parseLong(final String val) {
|
||||
final int shift = val.charAt(0)-SHIFT_START_LONG;
|
||||
if (shift>0 && shift<=63)
|
||||
throw new FieldCache.StopFillCacheException();
|
||||
return prefixCodedToLong(val);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* A parser instance for filling a {@link FieldCache}, that parses prefix encoded fields as ints.
|
||||
*/
|
||||
public static final FieldCache.IntParser FIELD_CACHE_INT_PARSER=new FieldCache.IntParser(){
|
||||
public final int parseInt(final String val) {
|
||||
final int shift = val.charAt(0)-SHIFT_START_INT;
|
||||
if (shift>0 && shift<=31)
|
||||
throw new FieldCache.StopFillCacheException();
|
||||
return prefixCodedToInt(val);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* A parser instance for filling a {@link ExtendedFieldCache}, that parses prefix encoded fields as doubles.
|
||||
* This uses {@link #sortableLongToDouble} to convert the encoded long to a double.
|
||||
*/
|
||||
public static final ExtendedFieldCache.DoubleParser FIELD_CACHE_DOUBLE_PARSER=new ExtendedFieldCache.DoubleParser(){
|
||||
public final double parseDouble(final String val) {
|
||||
final int shift = val.charAt(0)-SHIFT_START_LONG;
|
||||
if (shift>0 && shift<=63)
|
||||
throw new FieldCache.StopFillCacheException();
|
||||
return sortableLongToDouble(prefixCodedToLong(val));
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* A parser instance for filling a {@link FieldCache}, that parses prefix encoded fields as floats.
|
||||
* This uses {@link #sortableIntToFloat} to convert the encoded int to a float.
|
||||
*/
|
||||
public static final FieldCache.FloatParser FIELD_CACHE_FLOAT_PARSER=new FieldCache.FloatParser(){
|
||||
public final float parseFloat(final String val) {
|
||||
final int shift = val.charAt(0)-SHIFT_START_INT;
|
||||
if (shift>0 && shift<=31)
|
||||
throw new FieldCache.StopFillCacheException();
|
||||
return sortableIntToFloat(prefixCodedToInt(val));
|
||||
}
|
||||
};
|
||||
|
||||
/** internal */
|
||||
static int longToPrefixCoded(final long val, final int shift, final char[] buffer) {
|
||||
int nChars = (63-shift)/7 + 1, len = nChars+1;
|
||||
buffer[0] = (char)(SHIFT_START_LONG + shift);
|
||||
long sortableBits = val ^ 0x8000000000000000L;
|
||||
sortableBits >>>= shift;
|
||||
while (nChars>=1) {
|
||||
// Store 7 bits per character for good efficiency when UTF-8 encoding.
|
||||
// The whole number is right-justified so that lucene can prefix-encode
|
||||
// the terms more efficiently.
|
||||
buffer[nChars--] = (char)(sortableBits & 0x7f);
|
||||
sortableBits >>>= 7;
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
/**
|
||||
* This is a convenience method, that returns prefix coded bits of a long without
|
||||
* reducing the precision. It can be used to store the full precision value as a
|
||||
* stored field in index.
|
||||
* <p>To decode, use {@link #prefixCodedToLong}.
|
||||
*/
|
||||
public static String longToPrefixCoded(final long val) {
|
||||
return longToPrefixCoded(val, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert: Returns prefix coded bits after reducing the precision by <code>shift</code> bits.
|
||||
* This is method is used by {@link LongRangeBuilder}.
|
||||
*/
|
||||
public static String longToPrefixCoded(final long val, final int shift) {
|
||||
if (shift>63 || shift<0)
|
||||
throw new IllegalArgumentException("Illegal shift value, must be 0..63");
|
||||
final char[] buffer = new char[LONG_BUF_SIZE];
|
||||
final int len = longToPrefixCoded(val, shift, buffer);
|
||||
return new String(buffer, 0, len);
|
||||
}
|
||||
|
||||
/** internal */
|
||||
static int intToPrefixCoded(final int val, final int shift, final char[] buffer) {
|
||||
int nChars = (31-shift)/7 + 1, len = nChars+1;
|
||||
buffer[0] = (char)(SHIFT_START_INT + shift);
|
||||
int sortableBits = val ^ 0x80000000;
|
||||
sortableBits >>>= shift;
|
||||
while (nChars>=1) {
|
||||
// Store 7 bits per character for good efficiency when UTF-8 encoding.
|
||||
// The whole number is right-justified so that lucene can prefix-encode
|
||||
// the terms more efficiently.
|
||||
buffer[nChars--] = (char)(sortableBits & 0x7f);
|
||||
sortableBits >>>= 7;
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
/**
|
||||
* This is a convenience method, that returns prefix coded bits of an int without
|
||||
* reducing the precision. It can be used to store the full precision value as a
|
||||
* stored field in index.
|
||||
* <p>To decode, use {@link #prefixCodedToInt}.
|
||||
*/
|
||||
public static String intToPrefixCoded(final int val) {
|
||||
return intToPrefixCoded(val, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert: Returns prefix coded bits after reducing the precision by <code>shift</code> bits.
|
||||
* This is method is used by {@link IntRangeBuilder}.
|
||||
*/
|
||||
public static String intToPrefixCoded(final int val, final int shift) {
|
||||
if (shift>31 || shift<0)
|
||||
throw new IllegalArgumentException("Illegal shift value, must be 0..31");
|
||||
final char[] buffer = new char[INT_BUF_SIZE];
|
||||
final int len = intToPrefixCoded(val, shift, buffer);
|
||||
return new String(buffer, 0, len);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a long from prefixCoded characters.
|
||||
* Rightmost bits will be zero for lower precision codes.
|
||||
* This method can be used to decode e.g. a stored field.
|
||||
* @throws NumberFormatException if the supplied string is
|
||||
* not correctly prefix encoded.
|
||||
* @see #longToPrefixCoded(long)
|
||||
*/
|
||||
public static long prefixCodedToLong(final String prefixCoded) {
|
||||
final int shift = prefixCoded.charAt(0)-SHIFT_START_LONG;
|
||||
if (shift>63 || shift<0)
|
||||
throw new NumberFormatException("Invalid shift value in prefixCoded string (is encoded value really a LONG?)");
|
||||
long sortableBits = 0L;
|
||||
for (int i=1, len=prefixCoded.length(); i<len; i++) {
|
||||
sortableBits <<= 7;
|
||||
final char ch = prefixCoded.charAt(i);
|
||||
if (ch>0x7f) {
|
||||
throw new NumberFormatException(
|
||||
"Invalid prefixCoded numerical value representation (char "+
|
||||
Integer.toHexString((int)ch)+" at position "+i+" is invalid)"
|
||||
);
|
||||
}
|
||||
sortableBits |= (long)ch;
|
||||
}
|
||||
return (sortableBits << shift) ^ 0x8000000000000000L;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an int from prefixCoded characters.
|
||||
* Rightmost bits will be zero for lower precision codes.
|
||||
* This method can be used to decode e.g. a stored field.
|
||||
* @throws NumberFormatException if the supplied string is
|
||||
* not correctly prefix encoded.
|
||||
* @see #intToPrefixCoded(int)
|
||||
*/
|
||||
public static int prefixCodedToInt(final String prefixCoded) {
|
||||
final int shift = prefixCoded.charAt(0)-SHIFT_START_INT;
|
||||
if (shift>31 || shift<0)
|
||||
throw new NumberFormatException("Invalid shift value in prefixCoded string (is encoded value really an INT?)");
|
||||
int sortableBits = 0;
|
||||
for (int i=1, len=prefixCoded.length(); i<len; i++) {
|
||||
sortableBits <<= 7;
|
||||
final char ch = prefixCoded.charAt(i);
|
||||
if (ch>0x7f) {
|
||||
throw new NumberFormatException(
|
||||
"Invalid prefixCoded numerical value representation (char "+
|
||||
Integer.toHexString((int)ch)+" at position "+i+" is invalid)"
|
||||
);
|
||||
}
|
||||
sortableBits |= (int)ch;
|
||||
}
|
||||
return (sortableBits << shift) ^ 0x80000000;
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a <code>double</code> value to a sortable signed <code>long</code>.
|
||||
* The value is converted by getting their IEEE 754 floating-point "double format"
|
||||
* bit layout and then some bits are swapped, to be able to compare the result as long.
|
||||
* By this the precision is not reduced, but the value can easily used as a long.
|
||||
* @see #sortableLongToDouble
|
||||
*/
|
||||
public static long doubleToSortableLong(double val) {
|
||||
long f = Double.doubleToLongBits(val);
|
||||
if (f<0) f ^= 0x7fffffffffffffffL;
|
||||
return f;
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a sortable <code>long</code> back to a <code>double</code>.
|
||||
* @see #doubleToSortableLong
|
||||
*/
|
||||
public static double sortableLongToDouble(long val) {
|
||||
if (val<0) val ^= 0x7fffffffffffffffL;
|
||||
return Double.longBitsToDouble(val);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a <code>float</code> value to a sortable signed <code>int</code>.
|
||||
* The value is converted by getting their IEEE 754 floating-point "float format"
|
||||
* bit layout and then some bits are swapped, to be able to compare the result as int.
|
||||
* By this the precision is not reduced, but the value can easily used as an int.
|
||||
* @see #sortableIntToFloat
|
||||
*/
|
||||
public static int floatToSortableInt(float val) {
|
||||
int f = Float.floatToIntBits(val);
|
||||
if (f<0) f ^= 0x7fffffff;
|
||||
return f;
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a sortable <code>int</code> back to a <code>float</code>.
|
||||
* @see #floatToSortableInt
|
||||
*/
|
||||
public static float sortableIntToFloat(int val) {
|
||||
if (val<0) val ^= 0x7fffffff;
|
||||
return Float.intBitsToFloat(val);
|
||||
}
|
||||
|
||||
/** A factory method, that generates a {@link SortField} instance for sorting prefix encoded long values. */
|
||||
public static SortField getLongSortField(final String field, final boolean reverse) {
|
||||
return new SortField(field, FIELD_CACHE_LONG_PARSER, reverse);
|
||||
}
|
||||
|
||||
/** A factory method, that generates a {@link SortField} instance for sorting prefix encoded int values. */
|
||||
public static SortField getIntSortField(final String field, final boolean reverse) {
|
||||
return new SortField(field, FIELD_CACHE_INT_PARSER, reverse);
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert: Splits a long range recursively.
|
||||
* You may implement a builder that adds clauses to a
|
||||
* {@link org.apache.lucene.search.BooleanQuery} for each call to its
|
||||
* {@link LongRangeBuilder#addRange(String,String)}
|
||||
* method.
|
||||
* <p>This method is used by {@link LongTrieRangeQuery}.
|
||||
*/
|
||||
public static void splitLongRange(final LongRangeBuilder builder,
|
||||
final int precisionStep, final long minBound, final long maxBound
|
||||
) {
|
||||
if (precisionStep<1 || precisionStep>64)
|
||||
throw new IllegalArgumentException("precisionStep may only be 1..64");
|
||||
splitRange(builder, 64, precisionStep, minBound, maxBound);
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert: Splits an int range recursively.
|
||||
* You may implement a builder that adds clauses to a
|
||||
* {@link org.apache.lucene.search.BooleanQuery} for each call to its
|
||||
* {@link IntRangeBuilder#addRange(String,String)}
|
||||
* method.
|
||||
* <p>This method is used by {@link IntTrieRangeQuery}.
|
||||
*/
|
||||
public static void splitIntRange(final IntRangeBuilder builder,
|
||||
final int precisionStep, final int minBound, final int maxBound
|
||||
) {
|
||||
if (precisionStep<1 || precisionStep>32)
|
||||
throw new IllegalArgumentException("precisionStep may only be 1..32");
|
||||
splitRange(builder, 32, precisionStep, (long)minBound, (long)maxBound);
|
||||
}
|
||||
|
||||
/** This helper does the splitting for both 32 and 64 bit. */
|
||||
private static void splitRange(
|
||||
final Object builder, final int valSize,
|
||||
final int precisionStep, long minBound, long maxBound
|
||||
) {
|
||||
if (minBound > maxBound) return;
|
||||
for (int shift=0; ; shift += precisionStep) {
|
||||
// calculate new bounds for inner precision
|
||||
final long diff = 1L << (shift+precisionStep),
|
||||
mask = ((1L<<precisionStep) - 1L) << shift;
|
||||
final boolean
|
||||
hasLower = (minBound & mask) != 0L,
|
||||
hasUpper = (maxBound & mask) != mask;
|
||||
final long
|
||||
nextMinBound = (hasLower ? (minBound + diff) : minBound) & ~mask,
|
||||
nextMaxBound = (hasUpper ? (maxBound - diff) : maxBound) & ~mask;
|
||||
|
||||
if (shift+precisionStep>=valSize || nextMinBound>nextMaxBound) {
|
||||
// We are in the lowest precision or the next precision is not available.
|
||||
addRange(builder, valSize, minBound, maxBound, shift);
|
||||
// exit the split recursion loop
|
||||
break;
|
||||
}
|
||||
|
||||
if (hasLower)
|
||||
addRange(builder, valSize, minBound, minBound | mask, shift);
|
||||
if (hasUpper)
|
||||
addRange(builder, valSize, maxBound & ~mask, maxBound, shift);
|
||||
|
||||
// recurse to next precision
|
||||
minBound = nextMinBound;
|
||||
maxBound = nextMaxBound;
|
||||
}
|
||||
}
|
||||
|
||||
/** Helper that delegates to correct range builder */
|
||||
private static void addRange(
|
||||
final Object builder, final int valSize,
|
||||
long minBound, long maxBound,
|
||||
final int shift
|
||||
) {
|
||||
// for the max bound set all lower bits (that were shifted away):
|
||||
// this is important for testing or other usages of the splitted range
|
||||
// (e.g. to reconstruct the full range). The prefixEncoding will remove
|
||||
// the bits anyway, so they do not hurt!
|
||||
maxBound |= (1L << shift) - 1L;
|
||||
// delegate to correct range builder
|
||||
switch(valSize) {
|
||||
case 64:
|
||||
((LongRangeBuilder)builder).addRange(minBound, maxBound, shift);
|
||||
break;
|
||||
case 32:
|
||||
((IntRangeBuilder)builder).addRange((int)minBound, (int)maxBound, shift);
|
||||
break;
|
||||
default:
|
||||
// Should not happen!
|
||||
throw new IllegalArgumentException("valSize must be 32 or 64.");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert: Callback for {@link #splitLongRange}.
|
||||
* You need to overwrite only one of the methods.
|
||||
* <p><font color="red">WARNING: This is a very low-level interface,
|
||||
* the method signatures may change in later versions.</font>
|
||||
*/
|
||||
public static abstract class LongRangeBuilder {
|
||||
|
||||
/**
|
||||
* Overwrite this method, if you like to receive the already prefix encoded range bounds.
|
||||
* You can directly build classical (inclusive) range queries from them.
|
||||
*/
|
||||
public void addRange(String minPrefixCoded, String maxPrefixCoded) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
/**
|
||||
* Overwrite this method, if you like to receive the raw long range bounds.
|
||||
* You can use this for e.g. debugging purposes (print out range bounds).
|
||||
*/
|
||||
public void addRange(final long min, final long max, final int shift) {
|
||||
addRange(longToPrefixCoded(min, shift), longToPrefixCoded(max, shift));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert: Callback for {@link #splitIntRange}.
|
||||
* You need to overwrite only one of the methods.
|
||||
* <p><font color="red">WARNING: This is a very low-level interface,
|
||||
* the method signatures may change in later versions.</font>
|
||||
*/
|
||||
public static abstract class IntRangeBuilder {
|
||||
|
||||
/**
|
||||
* Overwrite this method, if you like to receive the already prefix encoded range bounds.
|
||||
* You can directly build classical range (inclusive) queries from them.
|
||||
*/
|
||||
public void addRange(String minPrefixCoded, String maxPrefixCoded) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
/**
|
||||
* Overwrite this method, if you like to receive the raw int range bounds.
|
||||
* You can use this for e.g. debugging purposes (print out range bounds).
|
||||
*/
|
||||
public void addRange(final int min, final int max, final int shift) {
|
||||
addRange(intToPrefixCoded(min, shift), intToPrefixCoded(max, shift));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -1,141 +0,0 @@
|
|||
<html>
|
||||
<body>
|
||||
<p>This package provides fast numeric range queries/filters on <code>long</code>, <code>double</code>, <code>int</code>,
|
||||
or <code>float</code> (and other data types, that can be converted to numerical values) fields based on trie structures.</p>
|
||||
|
||||
<h3>How it works</h3>
|
||||
<p>See the publication about <a target="_blank" href="http://www.panfmp.org">panFMP</a>, where this algorithm was described:
|
||||
|
||||
<blockquote><strong>Schindler, U, Diepenbroek, M</strong>, 2008. <em>Generic XML-based Framework for Metadata Portals.</em>
|
||||
Computers & Geosciences 34 (12), 1947-1955.
|
||||
<a href="http://dx.doi.org/10.1016/j.cageo.2008.02.023" target="_blank">doi:10.1016/j.cageo.2008.02.023</a></blockquote>
|
||||
|
||||
<p><em>A quote from this paper:</em> Because Apache Lucene is a full-text search engine and not a conventional database,
|
||||
it cannot handle numerical ranges (e.g., field value is inside user defined bounds, even dates are numerical values).
|
||||
We have developed an extension to Apache Lucene that stores
|
||||
the numerical values in a special string-encoded format with variable precision
|
||||
(all numerical values like doubles, longs, floats, and ints are converted to lexicographic sortable string representations
|
||||
and stored with different precisions. For a more detailed description of how the values are stored,
|
||||
see {@link org.apache.lucene.search.trie.TrieUtils}. A range is then divided recursively into
|
||||
multiple intervals for searching:
|
||||
The center of the range is searched only with the lowest possible precision in the trie,
|
||||
while the boundaries are matched more exactly. This reduces the number of terms dramatically.</p>
|
||||
|
||||
<p>For the variant that stores long values in 8 different precisions (each reduced by 8 bits) that
|
||||
uses a lowest precision of 1 byte, the index contains only a maximum of 256 distinct values in the
|
||||
lowest precision. Overall, a range could consist of a theoretical maximum of
|
||||
<code>7*255*2 + 255 = 3825</code> distinct terms (when there is a term for every distinct value of an
|
||||
8-byte-number in the index and the range covers almost all of them; a maximum of 255 distinct values is used
|
||||
because it would always be possible to reduce the full 256 values to one term with degraded precision).
|
||||
In practise, we have seen up to 300 terms in most cases (index with 500,000 metadata records
|
||||
and a uniform value distribution).</p>
|
||||
|
||||
<p>You can choose any <code>precisionStep</code> when encoding integer values.
|
||||
Lower step values mean more precisions and so more terms in index (and index gets larger).
|
||||
On the other hand, the maximum number of terms to match reduces, which optimized query speed.
|
||||
The formula to calculate the maximum term count is:
|
||||
<pre>
|
||||
n = [ (bitsPerValue/precisionStep - 1) * (2^precisionStep - 1 ) * 2 ] + (2^precisionStep - 1 )
|
||||
</pre>
|
||||
<p><em>(this formula is only correct, when <code>bitsPerValue/precisionStep</code> is an integer;
|
||||
in other cases, the value must be rounded up and the last summand must contain the modulo of the division as
|
||||
precision step)</em>.
|
||||
For longs stored using a precision step of 4, <code>n = 15*15*2 + 15 = 465</code>, and for a precision
|
||||
step of 2, <code>n = 31*3*2 + 3 = 189</code>. But the faster search speed is reduced by more seeking
|
||||
in the term enum of the index. Because of this, the ideal <code>precisionStep</code> value can only
|
||||
be found out by testing. <b>Important:</b> You can index with a lower precision step value and test search speed
|
||||
using a multiple of the original step value.</p>
|
||||
|
||||
<p>This dramatically improves the performance of Apache Lucene with range queries, which
|
||||
are no longer dependent on the index size and the number of distinct values because there is
|
||||
an upper limit unrelated to either of these properties.</p>
|
||||
|
||||
<h3>Indexing Usage</h3>
|
||||
<p>To use the new query types the numerical values, which may be<code>long</code>, <code>double</code>, <code>int</code>,
|
||||
<code>float</code>, or <code>Date</code>, the values must be indexed in a special prefix encoded format
|
||||
using {@link org.apache.lucene.search.trie.LongTrieTokenStream} or
|
||||
{@link org.apache.lucene.search.trie.IntTrieTokenStream}, which generate the necessary tokens.
|
||||
Use {@link org.apache.lucene.search.trie.TrieUtils} to convert floating point values to integers.
|
||||
Example code for indexing (it is recommened to disable norms and term frequencies during indexing
|
||||
trie encoded fields):</p>
|
||||
|
||||
<pre>
|
||||
<em>// chose a step value, 8 is a general good value for large indexes:</em>
|
||||
int precisionStep = 8;
|
||||
|
||||
Document doc = new Document();
|
||||
|
||||
<em>// add some standard fields:</em>
|
||||
String svalue = "anything to index";
|
||||
doc.add(new Field("exampleString", svalue, Field.Store.YES, Field.Index.ANALYZED));
|
||||
|
||||
<em>// add some numerical fields:</em>
|
||||
long lvalue = 121345L;
|
||||
Field f = new Field("exampleLong", new LongTrieTokenStream(lvalue, precisionStep));
|
||||
f.setOmitNorms(true); f.setOmitTermFreqAndPositions(true);
|
||||
doc.add(f);
|
||||
double dvalue = 1.057E17;
|
||||
f = new Field("exampleDouble", new LongTrieTokenStream(TrieUtils.doubleToSortableLong(dvalue), precisionStep));
|
||||
f.setOmitNorms(true); f.setOmitTermFreqAndPositions(true);
|
||||
doc.add(f);
|
||||
int ivalue = 121345;
|
||||
f = new Field("exampleInt", new IntTrieTokenStream(ivalue, precisionStep));
|
||||
f.setOmitNorms(true); f.setOmitTermFreqAndPositions(true);
|
||||
doc.add(f);
|
||||
float fvalue = 1.057E17f;
|
||||
f = new Field("exampleFloat", new IntTrieTokenStream(TrieUtils.floatToSortableInt(fvalue), precisionStep));
|
||||
f.setOmitNorms(true); f.setOmitTermFreqAndPositions(true);
|
||||
doc.add(f);
|
||||
Date datevalue = new Date(); <em>// actual time</em>
|
||||
f = new Field("exampleDate", new LongTrieTokenStream(datevalue.getTime(), precisionStep));
|
||||
f.setOmitNorms(true); f.setOmitTermFreqAndPositions(true);
|
||||
doc.add(f);
|
||||
|
||||
<em>// if you want to also store one of the values:</em>
|
||||
doc.add(new Field("exampleLong", Long.toString(lvalue), Field.Store.YES, Field.Index.NO));
|
||||
|
||||
<em>// or as encoded value:</em>
|
||||
doc.add(new Field("exampleLong2", TrieUtils.longToPrefixCoded(lvalue), Field.Store.YES, Field.Index.NO));
|
||||
|
||||
<em>// now add document to IndexWriter, as usual</em>
|
||||
</pre>
|
||||
|
||||
<p><em>(for higher indexing performance, you can reuse the TokenStreams –
|
||||
more info about this in the stream documentation)</em></p>
|
||||
|
||||
<h3>Searching</h3>
|
||||
|
||||
<p>The numeric index fields you prepared in this way can be searched by
|
||||
{@link org.apache.lucene.search.trie.LongTrieRangeQuery} or {@link org.apache.lucene.search.trie.IntTrieRangeQuery}:</p>
|
||||
|
||||
<pre>
|
||||
<em>// Java 1.4, because Long.valueOf(long) is not available:</em>
|
||||
Query q = new LongTrieRangeQuery("exampleLong", precisionStep, new Long(123L), new Long(999999L), true, true);
|
||||
|
||||
<em>// OR, Java 1.5, using autoboxing:</em>
|
||||
Query q = new LongTrieRangeQuery("exampleLong", precisionStep, 123L, 999999L, true, true);
|
||||
|
||||
<em>// execute the search, as usual:</em>
|
||||
TopDocs docs = searcher.search(q, 10);
|
||||
for (int i = 0; i<docs.scoreDocs.length; i++) {
|
||||
Document doc = searcher.doc(docs.scoreDocs[i].doc);
|
||||
System.out.println(doc.get("exampleString"));
|
||||
|
||||
<em>// decode a prefix coded, stored field:</em>
|
||||
System.out.println(TrieUtils.prefixCodedToLong(doc.get("exampleLong2")));
|
||||
}
|
||||
</pre>
|
||||
|
||||
<h3>Performance</h3>
|
||||
|
||||
<p>Comparisions of the different types of RangeQueries on an index with about 500,000 docs showed
|
||||
that the old {@link org.apache.lucene.search.RangeQuery} (with raised
|
||||
{@link org.apache.lucene.search.BooleanQuery} clause count) took about 30-40 secs to complete,
|
||||
{@link org.apache.lucene.search.ConstantScoreRangeQuery} took 5 secs and executing
|
||||
{@link org.apache.lucene.search.trie.LongTrieRangeQuery} took <100ms to
|
||||
complete (on an Opteron64 machine, Java 1.5, 8 bit precision step).
|
||||
This query type was developed for a geographic portal, where the performance for
|
||||
e.g. bounding boxes or exact date/time stamps is important.</p>
|
||||
|
||||
</body>
|
||||
</html>
|
|
@ -1,402 +0,0 @@
|
|||
package org.apache.lucene.search.trie;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.RangeQuery;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.QueryUtils;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
public class TestIntTrieRangeQuery extends LuceneTestCase {
|
||||
// distance of entries
|
||||
private static final int distance = 6666;
|
||||
// shift the starting of the values to the left, to also have negative values:
|
||||
private static final int startOffset = - 1 << 15;
|
||||
// number of docs to generate for testing
|
||||
private static final int noDocs = 10000;
|
||||
|
||||
private static Field newField(String name, int precisionStep) {
|
||||
IntTrieTokenStream stream = new IntTrieTokenStream(precisionStep);
|
||||
stream.setUseNewAPI(true);
|
||||
Field f=new Field(name, stream);
|
||||
f.setOmitTermFreqAndPositions(true);
|
||||
f.setOmitNorms(true);
|
||||
return f;
|
||||
}
|
||||
|
||||
private static final RAMDirectory directory;
|
||||
private static final IndexSearcher searcher;
|
||||
static {
|
||||
try {
|
||||
// set the theoretical maximum term count for 8bit (see docs for the number)
|
||||
BooleanQuery.setMaxClauseCount(3*255*2 + 255);
|
||||
|
||||
directory = new RAMDirectory();
|
||||
IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(),
|
||||
true, MaxFieldLength.UNLIMITED);
|
||||
|
||||
Field
|
||||
field8 = newField("field8", 8),
|
||||
field4 = newField("field4", 4),
|
||||
field2 = newField("field2", 2),
|
||||
ascfield8 = newField("ascfield8", 8),
|
||||
ascfield4 = newField("ascfield4", 4),
|
||||
ascfield2 = newField("ascfield2", 2);
|
||||
|
||||
// Add a series of noDocs docs with increasing int values
|
||||
for (int l=0; l<noDocs; l++) {
|
||||
Document doc=new Document();
|
||||
// add fields, that have a distance to test general functionality
|
||||
int val=distance*l+startOffset;
|
||||
doc.add(new Field("value", TrieUtils.intToPrefixCoded(val), Field.Store.YES, Field.Index.NO));
|
||||
((IntTrieTokenStream)field8.tokenStreamValue()).setValue(val);
|
||||
doc.add(field8);
|
||||
((IntTrieTokenStream)field4.tokenStreamValue()).setValue(val);
|
||||
doc.add(field4);
|
||||
((IntTrieTokenStream)field2.tokenStreamValue()).setValue(val);
|
||||
doc.add(field2);
|
||||
// add ascending fields with a distance of 1, beginning at -noDocs/2 to test the correct splitting of range and inclusive/exclusive
|
||||
val=l-(noDocs/2);
|
||||
((IntTrieTokenStream)ascfield8.tokenStreamValue()).setValue(val);
|
||||
doc.add(ascfield8);
|
||||
((IntTrieTokenStream)ascfield4.tokenStreamValue()).setValue(val);
|
||||
doc.add(ascfield4);
|
||||
((IntTrieTokenStream)ascfield2.tokenStreamValue()).setValue(val);
|
||||
doc.add(ascfield2);
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
|
||||
writer.optimize();
|
||||
writer.close();
|
||||
searcher=new IndexSearcher(directory);
|
||||
} catch (Exception e) {
|
||||
throw new Error(e);
|
||||
}
|
||||
}
|
||||
|
||||
/** test for both constant score and boolean query, the other tests only use the constant score mode */
|
||||
private void testRange(int precisionStep) throws Exception {
|
||||
String field="field"+precisionStep;
|
||||
int count=3000;
|
||||
int lower=(distance*3/2)+startOffset, upper=lower + count*distance + (distance/3);
|
||||
IntTrieRangeQuery q = new IntTrieRangeQuery(field, precisionStep, new Integer(lower), new Integer(upper), true, true);
|
||||
IntTrieRangeFilter f = new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), true, true);
|
||||
int lastTerms = 0;
|
||||
for (byte i=0; i<3; i++) {
|
||||
TopDocs topDocs;
|
||||
int terms;
|
||||
String type;
|
||||
q.clearTotalNumberOfTerms();
|
||||
f.clearTotalNumberOfTerms();
|
||||
switch (i) {
|
||||
case 0:
|
||||
type = " (constant score)";
|
||||
q.setConstantScoreRewrite(true);
|
||||
topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER);
|
||||
terms = q.getTotalNumberOfTerms();
|
||||
break;
|
||||
case 1:
|
||||
type = " (boolean query)";
|
||||
q.setConstantScoreRewrite(false);
|
||||
topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER);
|
||||
terms = q.getTotalNumberOfTerms();
|
||||
break;
|
||||
case 2:
|
||||
type = " (filter)";
|
||||
topDocs = searcher.search(new MatchAllDocsQuery(), f, noDocs, Sort.INDEXORDER);
|
||||
terms = f.getTotalNumberOfTerms();
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
System.out.println("Found "+terms+" distinct terms in range for field '"+field+"'"+type+".");
|
||||
ScoreDoc[] sd = topDocs.scoreDocs;
|
||||
assertNotNull(sd);
|
||||
assertEquals("Score doc count"+type, count, sd.length );
|
||||
Document doc=searcher.doc(sd[0].doc);
|
||||
assertEquals("First doc"+type, 2*distance+startOffset, TrieUtils.prefixCodedToInt(doc.get("value")) );
|
||||
doc=searcher.doc(sd[sd.length-1].doc);
|
||||
assertEquals("Last doc"+type, (1+count)*distance+startOffset, TrieUtils.prefixCodedToInt(doc.get("value")) );
|
||||
if (i>0) {
|
||||
assertEquals("Distinct term number is equal for all query types", lastTerms, terms);
|
||||
}
|
||||
lastTerms = terms;
|
||||
}
|
||||
}
|
||||
|
||||
public void testRange_8bit() throws Exception {
|
||||
testRange(8);
|
||||
}
|
||||
|
||||
public void testRange_4bit() throws Exception {
|
||||
testRange(4);
|
||||
}
|
||||
|
||||
public void testRange_2bit() throws Exception {
|
||||
testRange(2);
|
||||
}
|
||||
|
||||
public void testInverseRange() throws Exception {
|
||||
IntTrieRangeFilter f = new IntTrieRangeFilter("field8", 8, new Integer(1000), new Integer(-1000), true, true);
|
||||
assertSame("A inverse range should return the EMPTY_DOCIDSET instance", DocIdSet.EMPTY_DOCIDSET, f.getDocIdSet(searcher.getIndexReader()));
|
||||
}
|
||||
|
||||
private void testLeftOpenRange(int precisionStep) throws Exception {
|
||||
String field="field"+precisionStep;
|
||||
int count=3000;
|
||||
int upper=(count-1)*distance + (distance/3) + startOffset;
|
||||
IntTrieRangeQuery q=new IntTrieRangeQuery(field, precisionStep, null, new Integer(upper), true, true);
|
||||
TopDocs topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER);
|
||||
System.out.println("Found "+q.getTotalNumberOfTerms()+" distinct terms in left open range for field '"+field+"'.");
|
||||
ScoreDoc[] sd = topDocs.scoreDocs;
|
||||
assertNotNull(sd);
|
||||
assertEquals("Score doc count", count, sd.length );
|
||||
Document doc=searcher.doc(sd[0].doc);
|
||||
assertEquals("First doc", startOffset, TrieUtils.prefixCodedToInt(doc.get("value")) );
|
||||
doc=searcher.doc(sd[sd.length-1].doc);
|
||||
assertEquals("Last doc", (count-1)*distance+startOffset, TrieUtils.prefixCodedToInt(doc.get("value")) );
|
||||
}
|
||||
|
||||
public void testLeftOpenRange_8bit() throws Exception {
|
||||
testLeftOpenRange(8);
|
||||
}
|
||||
|
||||
public void testLeftOpenRange_4bit() throws Exception {
|
||||
testLeftOpenRange(4);
|
||||
}
|
||||
|
||||
public void testLeftOpenRange_2bit() throws Exception {
|
||||
testLeftOpenRange(2);
|
||||
}
|
||||
|
||||
private void testRightOpenRange(int precisionStep) throws Exception {
|
||||
String field="field"+precisionStep;
|
||||
int count=3000;
|
||||
int lower=(count-1)*distance + (distance/3) +startOffset;
|
||||
IntTrieRangeQuery q=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), null, true, true);
|
||||
TopDocs topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER);
|
||||
System.out.println("Found "+q.getTotalNumberOfTerms()+" distinct terms in right open range for field '"+field+"'.");
|
||||
ScoreDoc[] sd = topDocs.scoreDocs;
|
||||
assertNotNull(sd);
|
||||
assertEquals("Score doc count", noDocs-count, sd.length );
|
||||
Document doc=searcher.doc(sd[0].doc);
|
||||
assertEquals("First doc", count*distance+startOffset, TrieUtils.prefixCodedToInt(doc.get("value")) );
|
||||
doc=searcher.doc(sd[sd.length-1].doc);
|
||||
assertEquals("Last doc", (noDocs-1)*distance+startOffset, TrieUtils.prefixCodedToInt(doc.get("value")) );
|
||||
}
|
||||
|
||||
public void testRightOpenRange_8bit() throws Exception {
|
||||
testRightOpenRange(8);
|
||||
}
|
||||
|
||||
public void testRightOpenRange_4bit() throws Exception {
|
||||
testRightOpenRange(4);
|
||||
}
|
||||
|
||||
public void testRightOpenRange_2bit() throws Exception {
|
||||
testRightOpenRange(2);
|
||||
}
|
||||
|
||||
private void testRandomTrieAndClassicRangeQuery(int precisionStep) throws Exception {
|
||||
final Random rnd=newRandom();
|
||||
String field="field"+precisionStep;
|
||||
int termCountT=0,termCountC=0;
|
||||
for (int i=0; i<50; i++) {
|
||||
int lower=(int)(rnd.nextDouble()*noDocs*distance)+startOffset;
|
||||
int upper=(int)(rnd.nextDouble()*noDocs*distance)+startOffset;
|
||||
if (lower>upper) {
|
||||
int a=lower; lower=upper; upper=a;
|
||||
}
|
||||
// test inclusive range
|
||||
IntTrieRangeQuery tq=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), new Integer(upper), true, true);
|
||||
RangeQuery cq=new RangeQuery(field, TrieUtils.intToPrefixCoded(lower), TrieUtils.intToPrefixCoded(upper), true, true);
|
||||
cq.setConstantScoreRewrite(true);
|
||||
TopDocs tTopDocs = searcher.search(tq, 1);
|
||||
TopDocs cTopDocs = searcher.search(cq, 1);
|
||||
assertEquals("Returned count for IntTrieRangeQuery and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
|
||||
termCountT += tq.getTotalNumberOfTerms();
|
||||
termCountC += cq.getTotalNumberOfTerms();
|
||||
// test exclusive range
|
||||
tq=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), new Integer(upper), false, false);
|
||||
cq=new RangeQuery(field, TrieUtils.intToPrefixCoded(lower), TrieUtils.intToPrefixCoded(upper), false, false);
|
||||
cq.setConstantScoreRewrite(true);
|
||||
tTopDocs = searcher.search(tq, 1);
|
||||
cTopDocs = searcher.search(cq, 1);
|
||||
assertEquals("Returned count for IntTrieRangeQuery and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
|
||||
termCountT += tq.getTotalNumberOfTerms();
|
||||
termCountC += cq.getTotalNumberOfTerms();
|
||||
// test left exclusive range
|
||||
tq=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), new Integer(upper), false, true);
|
||||
cq=new RangeQuery(field, TrieUtils.intToPrefixCoded(lower), TrieUtils.intToPrefixCoded(upper), false, true);
|
||||
cq.setConstantScoreRewrite(true);
|
||||
tTopDocs = searcher.search(tq, 1);
|
||||
cTopDocs = searcher.search(cq, 1);
|
||||
assertEquals("Returned count for IntTrieRangeQuery and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
|
||||
termCountT += tq.getTotalNumberOfTerms();
|
||||
termCountC += cq.getTotalNumberOfTerms();
|
||||
// test right exclusive range
|
||||
tq=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), new Integer(upper), true, false);
|
||||
cq=new RangeQuery(field, TrieUtils.intToPrefixCoded(lower), TrieUtils.intToPrefixCoded(upper), true, false);
|
||||
cq.setConstantScoreRewrite(true);
|
||||
tTopDocs = searcher.search(tq, 1);
|
||||
cTopDocs = searcher.search(cq, 1);
|
||||
assertEquals("Returned count for IntTrieRangeQuery and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
|
||||
termCountT += tq.getTotalNumberOfTerms();
|
||||
termCountC += cq.getTotalNumberOfTerms();
|
||||
}
|
||||
System.out.println("Average number of terms during random search on '" + field + "':");
|
||||
System.out.println(" Trie query: " + (((double)termCountT)/(50*4)));
|
||||
System.out.println(" Classical query: " + (((double)termCountC)/(50*4)));
|
||||
}
|
||||
|
||||
public void testRandomTrieAndClassicRangeQuery_8bit() throws Exception {
|
||||
testRandomTrieAndClassicRangeQuery(8);
|
||||
}
|
||||
|
||||
public void testRandomTrieAndClassicRangeQuery_4bit() throws Exception {
|
||||
testRandomTrieAndClassicRangeQuery(4);
|
||||
}
|
||||
|
||||
public void testRandomTrieAndClassicRangeQuery_2bit() throws Exception {
|
||||
testRandomTrieAndClassicRangeQuery(2);
|
||||
}
|
||||
|
||||
private void testRangeSplit(int precisionStep) throws Exception {
|
||||
final Random rnd=newRandom();
|
||||
String field="ascfield"+precisionStep;
|
||||
// 50 random tests
|
||||
for (int i=0; i<50; i++) {
|
||||
int lower=(int)(rnd.nextDouble()*noDocs - noDocs/2);
|
||||
int upper=(int)(rnd.nextDouble()*noDocs - noDocs/2);
|
||||
if (lower>upper) {
|
||||
int a=lower; lower=upper; upper=a;
|
||||
}
|
||||
// test inclusive range
|
||||
Query tq=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), new Integer(upper), true, true);
|
||||
TopDocs tTopDocs = searcher.search(tq, 1);
|
||||
assertEquals("Returned count of range query must be equal to inclusive range length", upper-lower+1, tTopDocs.totalHits );
|
||||
// test exclusive range
|
||||
tq=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), new Integer(upper), false, false);
|
||||
tTopDocs = searcher.search(tq, 1);
|
||||
assertEquals("Returned count of range query must be equal to exclusive range length", Math.max(upper-lower-1, 0), tTopDocs.totalHits );
|
||||
// test left exclusive range
|
||||
tq=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), new Integer(upper), false, true);
|
||||
tTopDocs = searcher.search(tq, 1);
|
||||
assertEquals("Returned count of range query must be equal to half exclusive range length", upper-lower, tTopDocs.totalHits );
|
||||
// test right exclusive range
|
||||
tq=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), new Integer(upper), true, false);
|
||||
tTopDocs = searcher.search(tq, 1);
|
||||
assertEquals("Returned count of range query must be equal to half exclusive range length", upper-lower, tTopDocs.totalHits );
|
||||
}
|
||||
}
|
||||
|
||||
public void testRangeSplit_8bit() throws Exception {
|
||||
testRangeSplit(8);
|
||||
}
|
||||
|
||||
public void testRangeSplit_4bit() throws Exception {
|
||||
testRangeSplit(4);
|
||||
}
|
||||
|
||||
public void testRangeSplit_2bit() throws Exception {
|
||||
testRangeSplit(2);
|
||||
}
|
||||
|
||||
private void testSorting(int precisionStep) throws Exception {
|
||||
final Random rnd=newRandom();
|
||||
String field="field"+precisionStep;
|
||||
// 10 random tests, the index order is ascending,
|
||||
// so using a reverse sort field should retun descending documents
|
||||
for (int i=0; i<10; i++) {
|
||||
int lower=(int)(rnd.nextDouble()*noDocs*distance)+startOffset;
|
||||
int upper=(int)(rnd.nextDouble()*noDocs*distance)+startOffset;
|
||||
if (lower>upper) {
|
||||
int a=lower; lower=upper; upper=a;
|
||||
}
|
||||
Query tq=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), new Integer(upper), true, true);
|
||||
TopDocs topDocs = searcher.search(tq, null, noDocs, new Sort(TrieUtils.getIntSortField(field, true)));
|
||||
if (topDocs.totalHits==0) continue;
|
||||
ScoreDoc[] sd = topDocs.scoreDocs;
|
||||
assertNotNull(sd);
|
||||
int last=TrieUtils.prefixCodedToInt(searcher.doc(sd[0].doc).get("value"));
|
||||
for (int j=1; j<sd.length; j++) {
|
||||
int act=TrieUtils.prefixCodedToInt(searcher.doc(sd[j].doc).get("value"));
|
||||
assertTrue("Docs should be sorted backwards", last>act );
|
||||
last=act;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testSorting_8bit() throws Exception {
|
||||
testSorting(8);
|
||||
}
|
||||
|
||||
public void testSorting_4bit() throws Exception {
|
||||
testSorting(4);
|
||||
}
|
||||
|
||||
public void testSorting_2bit() throws Exception {
|
||||
testSorting(2);
|
||||
}
|
||||
|
||||
public void testEqualsAndHash() throws Exception {
|
||||
QueryUtils.checkHashEquals(new IntTrieRangeQuery("test1", 4, new Integer(10), new Integer(20), true, true));
|
||||
QueryUtils.checkHashEquals(new IntTrieRangeQuery("test2", 4, new Integer(10), new Integer(20), false, true));
|
||||
QueryUtils.checkHashEquals(new IntTrieRangeQuery("test3", 4, new Integer(10), new Integer(20), true, false));
|
||||
QueryUtils.checkHashEquals(new IntTrieRangeQuery("test4", 4, new Integer(10), new Integer(20), false, false));
|
||||
QueryUtils.checkHashEquals(new IntTrieRangeQuery("test5", 4, new Integer(10), null, true, true));
|
||||
QueryUtils.checkHashEquals(new IntTrieRangeQuery("test6", 4, null, new Integer(20), true, true));
|
||||
QueryUtils.checkHashEquals(new IntTrieRangeQuery("test7", 4, null, null, true, true));
|
||||
QueryUtils.checkEqual(
|
||||
new IntTrieRangeQuery("test8", 4, new Integer(10), new Integer(20), true, true),
|
||||
new IntTrieRangeQuery("test8", 4, new Integer(10), new Integer(20), true, true)
|
||||
);
|
||||
QueryUtils.checkUnequal(
|
||||
new IntTrieRangeQuery("test9", 4, new Integer(10), new Integer(20), true, true),
|
||||
new IntTrieRangeQuery("test9", 8, new Integer(10), new Integer(20), true, true)
|
||||
);
|
||||
QueryUtils.checkUnequal(
|
||||
new IntTrieRangeQuery("test10a", 4, new Integer(10), new Integer(20), true, true),
|
||||
new IntTrieRangeQuery("test10b", 4, new Integer(10), new Integer(20), true, true)
|
||||
);
|
||||
QueryUtils.checkUnequal(
|
||||
new IntTrieRangeQuery("test11", 4, new Integer(10), new Integer(20), true, true),
|
||||
new IntTrieRangeQuery("test11", 4, new Integer(20), new Integer(10), true, true)
|
||||
);
|
||||
QueryUtils.checkUnequal(
|
||||
new IntTrieRangeQuery("test12", 4, new Integer(10), new Integer(20), true, true),
|
||||
new IntTrieRangeQuery("test12", 4, new Integer(10), new Integer(20), false, true)
|
||||
);
|
||||
}
|
||||
|
||||
}
|
|
@ -1,54 +0,0 @@
|
|||
package org.apache.lucene.search.trie;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
|
||||
public class TestIntTrieTokenStream extends LuceneTestCase {
|
||||
|
||||
static final int precisionStep = 8;
|
||||
static final int value = 123456;
|
||||
|
||||
public void testStreamNewAPI() throws Exception {
|
||||
final IntTrieTokenStream stream=new IntTrieTokenStream(value, precisionStep);
|
||||
stream.setUseNewAPI(true);
|
||||
// use getAttribute to test if attributes really exist, if not an IAE will be throwed
|
||||
final ShiftAttribute shiftAtt = (ShiftAttribute) stream.getAttribute(ShiftAttribute.class);
|
||||
final TermAttribute termAtt = (TermAttribute) stream.getAttribute(TermAttribute.class);
|
||||
for (int shift=0; shift<32; shift+=precisionStep) {
|
||||
assertTrue("New token is available", stream.incrementToken());
|
||||
assertEquals("Shift value", shift, shiftAtt.getShift());
|
||||
assertEquals("Term is correctly encoded", TrieUtils.intToPrefixCoded(value, shift), termAtt.term());
|
||||
}
|
||||
assertFalse("No more tokens available", stream.incrementToken());
|
||||
}
|
||||
|
||||
public void testStreamOldAPI() throws Exception {
|
||||
final IntTrieTokenStream stream=new IntTrieTokenStream(value, precisionStep);
|
||||
stream.setUseNewAPI(false);
|
||||
Token tok=new Token();
|
||||
for (int shift=0; shift<32; shift+=precisionStep) {
|
||||
assertNotNull("New token is available", tok=stream.next(tok));
|
||||
assertEquals("Term is correctly encoded", TrieUtils.intToPrefixCoded(value, shift), tok.term());
|
||||
}
|
||||
assertNull("No more tokens available", stream.next(tok));
|
||||
}
|
||||
|
||||
}
|
|
@ -1,402 +0,0 @@
|
|||
package org.apache.lucene.search.trie;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.RangeQuery;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.QueryUtils;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
public class TestLongTrieRangeQuery extends LuceneTestCase {
|
||||
// distance of entries
|
||||
private static final long distance = 66666L;
|
||||
// shift the starting of the values to the left, to also have negative values:
|
||||
private static final long startOffset = - 1L << 31;
|
||||
// number of docs to generate for testing
|
||||
private static final int noDocs = 10000;
|
||||
|
||||
private static Field newField(String name, int precisionStep) {
|
||||
LongTrieTokenStream stream = new LongTrieTokenStream(precisionStep);
|
||||
stream.setUseNewAPI(true);
|
||||
Field f=new Field(name, stream);
|
||||
f.setOmitTermFreqAndPositions(true);
|
||||
f.setOmitNorms(true);
|
||||
return f;
|
||||
}
|
||||
|
||||
private static final RAMDirectory directory;
|
||||
private static final IndexSearcher searcher;
|
||||
static {
|
||||
try {
|
||||
// set the theoretical maximum term count for 8bit (see docs for the number)
|
||||
BooleanQuery.setMaxClauseCount(7*255*2 + 255);
|
||||
|
||||
directory = new RAMDirectory();
|
||||
IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(),
|
||||
true, MaxFieldLength.UNLIMITED);
|
||||
|
||||
Field
|
||||
field8 = newField("field8", 8),
|
||||
field4 = newField("field4", 4),
|
||||
field2 = newField("field2", 2),
|
||||
ascfield8 = newField("ascfield8", 8),
|
||||
ascfield4 = newField("ascfield4", 4),
|
||||
ascfield2 = newField("ascfield2", 2);
|
||||
|
||||
// Add a series of noDocs docs with increasing long values
|
||||
for (int l=0; l<noDocs; l++) {
|
||||
Document doc=new Document();
|
||||
// add fields, that have a distance to test general functionality
|
||||
long val=distance*l+startOffset;
|
||||
doc.add(new Field("value", TrieUtils.longToPrefixCoded(val), Field.Store.YES, Field.Index.NO));
|
||||
((LongTrieTokenStream)field8.tokenStreamValue()).setValue(val);
|
||||
doc.add(field8);
|
||||
((LongTrieTokenStream)field4.tokenStreamValue()).setValue(val);
|
||||
doc.add(field4);
|
||||
((LongTrieTokenStream)field2.tokenStreamValue()).setValue(val);
|
||||
doc.add(field2);
|
||||
// add ascending fields with a distance of 1, beginning at -noDocs/2 to test the correct splitting of range and inclusive/exclusive
|
||||
val=l-(noDocs/2);
|
||||
((LongTrieTokenStream)ascfield8.tokenStreamValue()).setValue(val);
|
||||
doc.add(ascfield8);
|
||||
((LongTrieTokenStream)ascfield4.tokenStreamValue()).setValue(val);
|
||||
doc.add(ascfield4);
|
||||
((LongTrieTokenStream)ascfield2.tokenStreamValue()).setValue(val);
|
||||
doc.add(ascfield2);
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
|
||||
writer.optimize();
|
||||
writer.close();
|
||||
searcher=new IndexSearcher(directory);
|
||||
} catch (Exception e) {
|
||||
throw new Error(e);
|
||||
}
|
||||
}
|
||||
|
||||
/** test for constant score + boolean query + filter, the other tests only use the constant score mode */
|
||||
private void testRange(int precisionStep) throws Exception {
|
||||
String field="field"+precisionStep;
|
||||
int count=3000;
|
||||
long lower=(distance*3/2)+startOffset, upper=lower + count*distance + (distance/3);
|
||||
LongTrieRangeQuery q = new LongTrieRangeQuery(field, precisionStep, new Long(lower), new Long(upper), true, true);
|
||||
LongTrieRangeFilter f = new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), true, true);
|
||||
int lastTerms = 0;
|
||||
for (byte i=0; i<3; i++) {
|
||||
TopDocs topDocs;
|
||||
int terms;
|
||||
String type;
|
||||
q.clearTotalNumberOfTerms();
|
||||
f.clearTotalNumberOfTerms();
|
||||
switch (i) {
|
||||
case 0:
|
||||
type = " (constant score)";
|
||||
q.setConstantScoreRewrite(true);
|
||||
topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER);
|
||||
terms = q.getTotalNumberOfTerms();
|
||||
break;
|
||||
case 1:
|
||||
type = " (boolean query)";
|
||||
q.setConstantScoreRewrite(false);
|
||||
topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER);
|
||||
terms = q.getTotalNumberOfTerms();
|
||||
break;
|
||||
case 2:
|
||||
type = " (filter)";
|
||||
topDocs = searcher.search(new MatchAllDocsQuery(), f, noDocs, Sort.INDEXORDER);
|
||||
terms = f.getTotalNumberOfTerms();
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
System.out.println("Found "+terms+" distinct terms in range for field '"+field+"'"+type+".");
|
||||
ScoreDoc[] sd = topDocs.scoreDocs;
|
||||
assertNotNull(sd);
|
||||
assertEquals("Score doc count"+type, count, sd.length );
|
||||
Document doc=searcher.doc(sd[0].doc);
|
||||
assertEquals("First doc"+type, 2*distance+startOffset, TrieUtils.prefixCodedToLong(doc.get("value")) );
|
||||
doc=searcher.doc(sd[sd.length-1].doc);
|
||||
assertEquals("Last doc"+type, (1+count)*distance+startOffset, TrieUtils.prefixCodedToLong(doc.get("value")) );
|
||||
if (i>0) {
|
||||
assertEquals("Distinct term number is equal for all query types", lastTerms, terms);
|
||||
}
|
||||
lastTerms = terms;
|
||||
}
|
||||
}
|
||||
|
||||
public void testRange_8bit() throws Exception {
|
||||
testRange(8);
|
||||
}
|
||||
|
||||
public void testRange_4bit() throws Exception {
|
||||
testRange(4);
|
||||
}
|
||||
|
||||
public void testRange_2bit() throws Exception {
|
||||
testRange(2);
|
||||
}
|
||||
|
||||
public void testInverseRange() throws Exception {
|
||||
LongTrieRangeFilter f = new LongTrieRangeFilter("field8", 8, new Long(1000L), new Long(-1000L), true, true);
|
||||
assertSame("A inverse range should return the EMPTY_DOCIDSET instance", DocIdSet.EMPTY_DOCIDSET, f.getDocIdSet(searcher.getIndexReader()));
|
||||
}
|
||||
|
||||
private void testLeftOpenRange(int precisionStep) throws Exception {
|
||||
String field="field"+precisionStep;
|
||||
int count=3000;
|
||||
long upper=(count-1)*distance + (distance/3) + startOffset;
|
||||
LongTrieRangeQuery q=new LongTrieRangeQuery(field, precisionStep, null, new Long(upper), true, true);
|
||||
TopDocs topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER);
|
||||
System.out.println("Found "+q.getTotalNumberOfTerms()+" distinct terms in left open range for field '"+field+"'.");
|
||||
ScoreDoc[] sd = topDocs.scoreDocs;
|
||||
assertNotNull(sd);
|
||||
assertEquals("Score doc count", count, sd.length );
|
||||
Document doc=searcher.doc(sd[0].doc);
|
||||
assertEquals("First doc", startOffset, TrieUtils.prefixCodedToLong(doc.get("value")) );
|
||||
doc=searcher.doc(sd[sd.length-1].doc);
|
||||
assertEquals("Last doc", (count-1)*distance+startOffset, TrieUtils.prefixCodedToLong(doc.get("value")) );
|
||||
}
|
||||
|
||||
public void testLeftOpenRange_8bit() throws Exception {
|
||||
testLeftOpenRange(8);
|
||||
}
|
||||
|
||||
public void testLeftOpenRange_4bit() throws Exception {
|
||||
testLeftOpenRange(4);
|
||||
}
|
||||
|
||||
public void testLeftOpenRange_2bit() throws Exception {
|
||||
testLeftOpenRange(2);
|
||||
}
|
||||
|
||||
private void testRightOpenRange(int precisionStep) throws Exception {
|
||||
String field="field"+precisionStep;
|
||||
int count=3000;
|
||||
long lower=(count-1)*distance + (distance/3) +startOffset;
|
||||
LongTrieRangeQuery q=new LongTrieRangeQuery(field, precisionStep, new Long(lower), null, true, true);
|
||||
TopDocs topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER);
|
||||
System.out.println("Found "+q.getTotalNumberOfTerms()+" distinct terms in right open range for field '"+field+"'.");
|
||||
ScoreDoc[] sd = topDocs.scoreDocs;
|
||||
assertNotNull(sd);
|
||||
assertEquals("Score doc count", noDocs-count, sd.length );
|
||||
Document doc=searcher.doc(sd[0].doc);
|
||||
assertEquals("First doc", count*distance+startOffset, TrieUtils.prefixCodedToLong(doc.get("value")) );
|
||||
doc=searcher.doc(sd[sd.length-1].doc);
|
||||
assertEquals("Last doc", (noDocs-1)*distance+startOffset, TrieUtils.prefixCodedToLong(doc.get("value")) );
|
||||
}
|
||||
|
||||
public void testRightOpenRange_8bit() throws Exception {
|
||||
testRightOpenRange(8);
|
||||
}
|
||||
|
||||
public void testRightOpenRange_4bit() throws Exception {
|
||||
testRightOpenRange(4);
|
||||
}
|
||||
|
||||
public void testRightOpenRange_2bit() throws Exception {
|
||||
testRightOpenRange(2);
|
||||
}
|
||||
|
||||
private void testRandomTrieAndClassicRangeQuery(int precisionStep) throws Exception {
|
||||
final Random rnd=newRandom();
|
||||
String field="field"+precisionStep;
|
||||
int termCountT=0,termCountC=0;
|
||||
for (int i=0; i<50; i++) {
|
||||
long lower=(long)(rnd.nextDouble()*noDocs*distance)+startOffset;
|
||||
long upper=(long)(rnd.nextDouble()*noDocs*distance)+startOffset;
|
||||
if (lower>upper) {
|
||||
long a=lower; lower=upper; upper=a;
|
||||
}
|
||||
// test inclusive range
|
||||
LongTrieRangeQuery tq=new LongTrieRangeQuery(field, precisionStep, new Long(lower), new Long(upper), true, true);
|
||||
RangeQuery cq=new RangeQuery(field, TrieUtils.longToPrefixCoded(lower), TrieUtils.longToPrefixCoded(upper), true, true);
|
||||
cq.setConstantScoreRewrite(true);
|
||||
TopDocs tTopDocs = searcher.search(tq, 1);
|
||||
TopDocs cTopDocs = searcher.search(cq, 1);
|
||||
assertEquals("Returned count for LongTrieRangeQuery and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
|
||||
termCountT += tq.getTotalNumberOfTerms();
|
||||
termCountC += cq.getTotalNumberOfTerms();
|
||||
// test exclusive range
|
||||
tq=new LongTrieRangeQuery(field, precisionStep, new Long(lower), new Long(upper), false, false);
|
||||
cq=new RangeQuery(field, TrieUtils.longToPrefixCoded(lower), TrieUtils.longToPrefixCoded(upper), false, false);
|
||||
cq.setConstantScoreRewrite(true);
|
||||
tTopDocs = searcher.search(tq, 1);
|
||||
cTopDocs = searcher.search(cq, 1);
|
||||
assertEquals("Returned count for LongTrieRangeQuery and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
|
||||
termCountT += tq.getTotalNumberOfTerms();
|
||||
termCountC += cq.getTotalNumberOfTerms();
|
||||
// test left exclusive range
|
||||
tq=new LongTrieRangeQuery(field, precisionStep, new Long(lower), new Long(upper), false, true);
|
||||
cq=new RangeQuery(field, TrieUtils.longToPrefixCoded(lower), TrieUtils.longToPrefixCoded(upper), false, true);
|
||||
cq.setConstantScoreRewrite(true);
|
||||
tTopDocs = searcher.search(tq, 1);
|
||||
cTopDocs = searcher.search(cq, 1);
|
||||
assertEquals("Returned count for LongTrieRangeQuery and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
|
||||
termCountT += tq.getTotalNumberOfTerms();
|
||||
termCountC += cq.getTotalNumberOfTerms();
|
||||
// test right exclusive range
|
||||
tq=new LongTrieRangeQuery(field, precisionStep, new Long(lower), new Long(upper), true, false);
|
||||
cq=new RangeQuery(field, TrieUtils.longToPrefixCoded(lower), TrieUtils.longToPrefixCoded(upper), true, false);
|
||||
cq.setConstantScoreRewrite(true);
|
||||
tTopDocs = searcher.search(tq, 1);
|
||||
cTopDocs = searcher.search(cq, 1);
|
||||
assertEquals("Returned count for LongTrieRangeQuery and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
|
||||
termCountT += tq.getTotalNumberOfTerms();
|
||||
termCountC += cq.getTotalNumberOfTerms();
|
||||
}
|
||||
System.out.println("Average number of terms during random search on '" + field + "':");
|
||||
System.out.println(" Trie query: " + (((double)termCountT)/(50*4)));
|
||||
System.out.println(" Classical query: " + (((double)termCountC)/(50*4)));
|
||||
}
|
||||
|
||||
public void testRandomTrieAndClassicRangeQuery_8bit() throws Exception {
|
||||
testRandomTrieAndClassicRangeQuery(8);
|
||||
}
|
||||
|
||||
public void testRandomTrieAndClassicRangeQuery_4bit() throws Exception {
|
||||
testRandomTrieAndClassicRangeQuery(4);
|
||||
}
|
||||
|
||||
public void testRandomTrieAndClassicRangeQuery_2bit() throws Exception {
|
||||
testRandomTrieAndClassicRangeQuery(2);
|
||||
}
|
||||
|
||||
private void testRangeSplit(int precisionStep) throws Exception {
|
||||
final Random rnd=newRandom();
|
||||
String field="ascfield"+precisionStep;
|
||||
// 50 random tests
|
||||
for (int i=0; i<50; i++) {
|
||||
long lower=(long)(rnd.nextDouble()*noDocs - noDocs/2);
|
||||
long upper=(long)(rnd.nextDouble()*noDocs - noDocs/2);
|
||||
if (lower>upper) {
|
||||
long a=lower; lower=upper; upper=a;
|
||||
}
|
||||
// test inclusive range
|
||||
Query tq=new LongTrieRangeQuery(field, precisionStep, new Long(lower), new Long(upper), true, true);
|
||||
TopDocs tTopDocs = searcher.search(tq, 1);
|
||||
assertEquals("Returned count of range query must be equal to inclusive range length", upper-lower+1, tTopDocs.totalHits );
|
||||
// test exclusive range
|
||||
tq=new LongTrieRangeQuery(field, precisionStep, new Long(lower), new Long(upper), false, false);
|
||||
tTopDocs = searcher.search(tq, 1);
|
||||
assertEquals("Returned count of range query must be equal to exclusive range length", Math.max(upper-lower-1, 0), tTopDocs.totalHits );
|
||||
// test left exclusive range
|
||||
tq=new LongTrieRangeQuery(field, precisionStep, new Long(lower), new Long(upper), false, true);
|
||||
tTopDocs = searcher.search(tq, 1);
|
||||
assertEquals("Returned count of range query must be equal to half exclusive range length", upper-lower, tTopDocs.totalHits );
|
||||
// test right exclusive range
|
||||
tq=new LongTrieRangeQuery(field, precisionStep, new Long(lower), new Long(upper), true, false);
|
||||
tTopDocs = searcher.search(tq, 1);
|
||||
assertEquals("Returned count of range query must be equal to half exclusive range length", upper-lower, tTopDocs.totalHits );
|
||||
}
|
||||
}
|
||||
|
||||
public void testRangeSplit_8bit() throws Exception {
|
||||
testRangeSplit(8);
|
||||
}
|
||||
|
||||
public void testRangeSplit_4bit() throws Exception {
|
||||
testRangeSplit(4);
|
||||
}
|
||||
|
||||
public void testRangeSplit_2bit() throws Exception {
|
||||
testRangeSplit(2);
|
||||
}
|
||||
|
||||
private void testSorting(int precisionStep) throws Exception {
|
||||
final Random rnd=newRandom();
|
||||
String field="field"+precisionStep;
|
||||
// 10 random tests, the index order is ascending,
|
||||
// so using a reverse sort field should retun descending documents
|
||||
for (int i=0; i<10; i++) {
|
||||
long lower=(long)(rnd.nextDouble()*noDocs*distance)+startOffset;
|
||||
long upper=(long)(rnd.nextDouble()*noDocs*distance)+startOffset;
|
||||
if (lower>upper) {
|
||||
long a=lower; lower=upper; upper=a;
|
||||
}
|
||||
Query tq=new LongTrieRangeQuery(field, precisionStep, new Long(lower), new Long(upper), true, true);
|
||||
TopDocs topDocs = searcher.search(tq, null, noDocs, new Sort(TrieUtils.getLongSortField(field, true)));
|
||||
if (topDocs.totalHits==0) continue;
|
||||
ScoreDoc[] sd = topDocs.scoreDocs;
|
||||
assertNotNull(sd);
|
||||
long last=TrieUtils.prefixCodedToLong(searcher.doc(sd[0].doc).get("value"));
|
||||
for (int j=1; j<sd.length; j++) {
|
||||
long act=TrieUtils.prefixCodedToLong(searcher.doc(sd[j].doc).get("value"));
|
||||
assertTrue("Docs should be sorted backwards", last>act );
|
||||
last=act;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testSorting_8bit() throws Exception {
|
||||
testSorting(8);
|
||||
}
|
||||
|
||||
public void testSorting_4bit() throws Exception {
|
||||
testSorting(4);
|
||||
}
|
||||
|
||||
public void testSorting_2bit() throws Exception {
|
||||
testSorting(2);
|
||||
}
|
||||
|
||||
public void testEqualsAndHash() throws Exception {
|
||||
QueryUtils.checkHashEquals(new LongTrieRangeQuery("test1", 4, new Long(10L), new Long(20L), true, true));
|
||||
QueryUtils.checkHashEquals(new LongTrieRangeQuery("test2", 4, new Long(10L), new Long(20L), false, true));
|
||||
QueryUtils.checkHashEquals(new LongTrieRangeQuery("test3", 4, new Long(10L), new Long(20L), true, false));
|
||||
QueryUtils.checkHashEquals(new LongTrieRangeQuery("test4", 4, new Long(10L), new Long(20L), false, false));
|
||||
QueryUtils.checkHashEquals(new LongTrieRangeQuery("test5", 4, new Long(10L), null, true, true));
|
||||
QueryUtils.checkHashEquals(new LongTrieRangeQuery("test6", 4, null, new Long(20L), true, true));
|
||||
QueryUtils.checkHashEquals(new LongTrieRangeQuery("test7", 4, null, null, true, true));
|
||||
QueryUtils.checkEqual(
|
||||
new LongTrieRangeQuery("test8", 4, new Long(10L), new Long(20L), true, true),
|
||||
new LongTrieRangeQuery("test8", 4, new Long(10L), new Long(20L), true, true)
|
||||
);
|
||||
QueryUtils.checkUnequal(
|
||||
new LongTrieRangeQuery("test9", 4, new Long(10L), new Long(20L), true, true),
|
||||
new LongTrieRangeQuery("test9", 8, new Long(10L), new Long(20L), true, true)
|
||||
);
|
||||
QueryUtils.checkUnequal(
|
||||
new LongTrieRangeQuery("test10a", 4, new Long(10L), new Long(20L), true, true),
|
||||
new LongTrieRangeQuery("test10b", 4, new Long(10L), new Long(20L), true, true)
|
||||
);
|
||||
QueryUtils.checkUnequal(
|
||||
new LongTrieRangeQuery("test11", 4, new Long(10L), new Long(20L), true, true),
|
||||
new LongTrieRangeQuery("test11", 4, new Long(20L), new Long(10L), true, true)
|
||||
);
|
||||
QueryUtils.checkUnequal(
|
||||
new LongTrieRangeQuery("test12", 4, new Long(10L), new Long(20L), true, true),
|
||||
new LongTrieRangeQuery("test12", 4, new Long(10L), new Long(20L), false, true)
|
||||
);
|
||||
}
|
||||
|
||||
}
|
|
@ -1,54 +0,0 @@
|
|||
package org.apache.lucene.search.trie;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
|
||||
public class TestLongTrieTokenStream extends LuceneTestCase {
|
||||
|
||||
static final int precisionStep = 8;
|
||||
static final long value = 4573245871874382L;
|
||||
|
||||
public void testStreamNewAPI() throws Exception {
|
||||
final LongTrieTokenStream stream=new LongTrieTokenStream(value, precisionStep);
|
||||
stream.setUseNewAPI(true);
|
||||
// use getAttribute to test if attributes really exist, if not an IAE will be throwed
|
||||
final ShiftAttribute shiftAtt = (ShiftAttribute) stream.getAttribute(ShiftAttribute.class);
|
||||
final TermAttribute termAtt = (TermAttribute) stream.getAttribute(TermAttribute.class);
|
||||
for (int shift=0; shift<64; shift+=precisionStep) {
|
||||
assertTrue("New token is available", stream.incrementToken());
|
||||
assertEquals("Shift value", shift, shiftAtt.getShift());
|
||||
assertEquals("Term is correctly encoded", TrieUtils.longToPrefixCoded(value, shift), termAtt.term());
|
||||
}
|
||||
assertFalse("No more tokens available", stream.incrementToken());
|
||||
}
|
||||
|
||||
public void testStreamOldAPI() throws Exception {
|
||||
final LongTrieTokenStream stream=new LongTrieTokenStream(value, precisionStep);
|
||||
stream.setUseNewAPI(false);
|
||||
Token tok=new Token();
|
||||
for (int shift=0; shift<64; shift+=precisionStep) {
|
||||
assertNotNull("New token is available", tok=stream.next(tok));
|
||||
assertEquals("Term is correctly encoded", TrieUtils.longToPrefixCoded(value, shift), tok.term());
|
||||
}
|
||||
assertNull("No more tokens available", stream.next(tok));
|
||||
}
|
||||
|
||||
}
|
|
@ -1,339 +0,0 @@
|
|||
package org.apache.lucene.search.trie;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
|
||||
public class TestTrieUtils extends LuceneTestCase {
|
||||
|
||||
public void testLongConversionAndOrdering() throws Exception {
|
||||
// generate a series of encoded longs, each numerical one bigger than the one before
|
||||
String last=null;
|
||||
for (long l=-100000L; l<100000L; l++) {
|
||||
String act=TrieUtils.longToPrefixCoded(l);
|
||||
if (last!=null) {
|
||||
// test if smaller
|
||||
assertTrue("actual bigger than last", last.compareTo(act) < 0 );
|
||||
}
|
||||
// test is back and forward conversion works
|
||||
assertEquals("forward and back conversion should generate same long", l, TrieUtils.prefixCodedToLong(act));
|
||||
// next step
|
||||
last=act;
|
||||
}
|
||||
}
|
||||
|
||||
public void testIntConversionAndOrdering() throws Exception {
|
||||
// generate a series of encoded ints, each numerical one bigger than the one before
|
||||
String last=null;
|
||||
for (int i=-100000; i<100000; i++) {
|
||||
String act=TrieUtils.intToPrefixCoded(i);
|
||||
if (last!=null) {
|
||||
// test if smaller
|
||||
assertTrue("actual bigger than last", last.compareTo(act) < 0 );
|
||||
}
|
||||
// test is back and forward conversion works
|
||||
assertEquals("forward and back conversion should generate same int", i, TrieUtils.prefixCodedToInt(act));
|
||||
// next step
|
||||
last=act;
|
||||
}
|
||||
}
|
||||
|
||||
public void testLongSpecialValues() throws Exception {
|
||||
long[] vals=new long[]{
|
||||
Long.MIN_VALUE, Long.MIN_VALUE+1, Long.MIN_VALUE+2, -5003400000000L,
|
||||
-4000L, -3000L, -2000L, -1000L, -1L, 0L, 1L, 10L, 300L, 50006789999999999L, Long.MAX_VALUE-2, Long.MAX_VALUE-1, Long.MAX_VALUE
|
||||
};
|
||||
String[] prefixVals=new String[vals.length];
|
||||
|
||||
for (int i=0; i<vals.length; i++) {
|
||||
prefixVals[i]=TrieUtils.longToPrefixCoded(vals[i]);
|
||||
|
||||
// check forward and back conversion
|
||||
assertEquals( "forward and back conversion should generate same long", vals[i], TrieUtils.prefixCodedToLong(prefixVals[i]) );
|
||||
|
||||
// test if decoding values as int fails correctly
|
||||
try {
|
||||
TrieUtils.prefixCodedToInt(prefixVals[i]);
|
||||
fail("decoding a prefix coded long value as int should fail");
|
||||
} catch (NumberFormatException e) {
|
||||
// worked
|
||||
}
|
||||
}
|
||||
|
||||
// check sort order (prefixVals should be ascending)
|
||||
for (int i=1; i<prefixVals.length; i++) {
|
||||
assertTrue( "check sort order", prefixVals[i-1].compareTo( prefixVals[i] ) < 0 );
|
||||
}
|
||||
|
||||
// check the prefix encoding, lower precision should have the difference to original value equal to the lower removed bits
|
||||
for (int i=0; i<vals.length; i++) {
|
||||
for (int j=0; j<64; j++) {
|
||||
long prefixVal=TrieUtils.prefixCodedToLong(TrieUtils.longToPrefixCoded(vals[i], j));
|
||||
long mask=(1L << j) - 1L;
|
||||
assertEquals( "difference between prefix val and original value for "+vals[i]+" with shift="+j, vals[i] & mask, vals[i]-prefixVal );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testIntSpecialValues() throws Exception {
|
||||
int[] vals=new int[]{
|
||||
Integer.MIN_VALUE, Integer.MIN_VALUE+1, Integer.MIN_VALUE+2, -64765767,
|
||||
-4000, -3000, -2000, -1000, -1, 0, 1, 10, 300, 765878989, Integer.MAX_VALUE-2, Integer.MAX_VALUE-1, Integer.MAX_VALUE
|
||||
};
|
||||
String[] prefixVals=new String[vals.length];
|
||||
|
||||
for (int i=0; i<vals.length; i++) {
|
||||
prefixVals[i]=TrieUtils.intToPrefixCoded(vals[i]);
|
||||
|
||||
// check forward and back conversion
|
||||
assertEquals( "forward and back conversion should generate same int", vals[i], TrieUtils.prefixCodedToInt(prefixVals[i]) );
|
||||
|
||||
// test if decoding values as long fails correctly
|
||||
try {
|
||||
TrieUtils.prefixCodedToLong(prefixVals[i]);
|
||||
fail("decoding a prefix coded int value as long should fail");
|
||||
} catch (NumberFormatException e) {
|
||||
// worked
|
||||
}
|
||||
}
|
||||
|
||||
// check sort order (prefixVals should be ascending)
|
||||
for (int i=1; i<prefixVals.length; i++) {
|
||||
assertTrue( "check sort order", prefixVals[i-1].compareTo( prefixVals[i] ) < 0 );
|
||||
}
|
||||
|
||||
// check the prefix encoding, lower precision should have the difference to original value equal to the lower removed bits
|
||||
for (int i=0; i<vals.length; i++) {
|
||||
for (int j=0; j<32; j++) {
|
||||
int prefixVal=TrieUtils.prefixCodedToInt(TrieUtils.intToPrefixCoded(vals[i], j));
|
||||
int mask=(1 << j) - 1;
|
||||
assertEquals( "difference between prefix val and original value for "+vals[i]+" with shift="+j, vals[i] & mask, vals[i]-prefixVal );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testDoubles() throws Exception {
|
||||
double[] vals=new double[]{
|
||||
Double.NEGATIVE_INFINITY, -2.3E25, -1.0E15, -1.0, -1.0E-1, -1.0E-2, -0.0,
|
||||
+0.0, 1.0E-2, 1.0E-1, 1.0, 1.0E15, 2.3E25, Double.POSITIVE_INFINITY
|
||||
};
|
||||
long[] longVals=new long[vals.length];
|
||||
|
||||
// check forward and back conversion
|
||||
for (int i=0; i<vals.length; i++) {
|
||||
longVals[i]=TrieUtils.doubleToSortableLong(vals[i]);
|
||||
assertTrue( "forward and back conversion should generate same double", Double.compare(vals[i], TrieUtils.sortableLongToDouble(longVals[i]))==0 );
|
||||
}
|
||||
|
||||
// check sort order (prefixVals should be ascending)
|
||||
for (int i=1; i<longVals.length; i++) {
|
||||
assertTrue( "check sort order", longVals[i-1] < longVals[i] );
|
||||
}
|
||||
}
|
||||
|
||||
public void testFloats() throws Exception {
|
||||
float[] vals=new float[]{
|
||||
Float.NEGATIVE_INFINITY, -2.3E25f, -1.0E15f, -1.0f, -1.0E-1f, -1.0E-2f, -0.0f,
|
||||
+0.0f, 1.0E-2f, 1.0E-1f, 1.0f, 1.0E15f, 2.3E25f, Float.POSITIVE_INFINITY
|
||||
};
|
||||
int[] intVals=new int[vals.length];
|
||||
|
||||
// check forward and back conversion
|
||||
for (int i=0; i<vals.length; i++) {
|
||||
intVals[i]=TrieUtils.floatToSortableInt(vals[i]);
|
||||
assertTrue( "forward and back conversion should generate same double", Float.compare(vals[i], TrieUtils.sortableIntToFloat(intVals[i]))==0 );
|
||||
}
|
||||
|
||||
// check sort order (prefixVals should be ascending)
|
||||
for (int i=1; i<intVals.length; i++) {
|
||||
assertTrue( "check sort order", intVals[i-1] < intVals[i] );
|
||||
}
|
||||
}
|
||||
|
||||
// INFO: Tests for trieCodeLong()/trieCodeInt() not needed because implicitely tested by range filter tests
|
||||
|
||||
/** Note: The neededBounds iterator must be unsigned (easier understanding what's happening) */
|
||||
protected void assertLongRangeSplit(final long lower, final long upper, int precisionStep,
|
||||
final boolean useBitSet, final Iterator neededBounds
|
||||
) throws Exception {
|
||||
final OpenBitSet bits=useBitSet ? new OpenBitSet(upper-lower+1) : null;
|
||||
|
||||
TrieUtils.splitLongRange(new TrieUtils.LongRangeBuilder() {
|
||||
//@Override
|
||||
public void addRange(long min, long max, int shift) {
|
||||
assertTrue("min, max should be inside bounds", min>=lower && min<=upper && max>=lower && max<=upper);
|
||||
if (useBitSet) for (long l=min; l<=max; l++) {
|
||||
assertFalse("ranges should not overlap", bits.getAndSet(l-lower) );
|
||||
}
|
||||
// make unsigned longs for easier display and understanding
|
||||
min ^= 0x8000000000000000L;
|
||||
max ^= 0x8000000000000000L;
|
||||
//System.out.println("new Long(0x"+Long.toHexString(min>>>shift)+"L),new Long(0x"+Long.toHexString(max>>>shift)+"L),");
|
||||
assertEquals( "inner min bound", ((Long)neededBounds.next()).longValue(), min>>>shift);
|
||||
assertEquals( "inner max bound", ((Long)neededBounds.next()).longValue(), max>>>shift);
|
||||
}
|
||||
}, precisionStep, lower, upper);
|
||||
|
||||
if (useBitSet) {
|
||||
// after flipping all bits in the range, the cardinality should be zero
|
||||
bits.flip(0,upper-lower+1);
|
||||
assertTrue("The sub-range concenated should match the whole range", bits.isEmpty());
|
||||
}
|
||||
}
|
||||
|
||||
public void testSplitLongRange() throws Exception {
|
||||
// a hard-coded "standard" range
|
||||
assertLongRangeSplit(-5000L, 9500L, 4, true, Arrays.asList(new Long[]{
|
||||
new Long(0x7fffffffffffec78L),new Long(0x7fffffffffffec7fL),
|
||||
new Long(0x8000000000002510L),new Long(0x800000000000251cL),
|
||||
new Long(0x7fffffffffffec8L), new Long(0x7fffffffffffecfL),
|
||||
new Long(0x800000000000250L), new Long(0x800000000000250L),
|
||||
new Long(0x7fffffffffffedL), new Long(0x7fffffffffffefL),
|
||||
new Long(0x80000000000020L), new Long(0x80000000000024L),
|
||||
new Long(0x7ffffffffffffL), new Long(0x8000000000001L)
|
||||
}).iterator());
|
||||
|
||||
// the same with no range splitting
|
||||
assertLongRangeSplit(-5000L, 9500L, 64, true, Arrays.asList(new Long[]{
|
||||
new Long(0x7fffffffffffec78L),new Long(0x800000000000251cL)
|
||||
}).iterator());
|
||||
|
||||
// this tests optimized range splitting, if one of the inner bounds
|
||||
// is also the bound of the next lower precision, it should be used completely
|
||||
assertLongRangeSplit(0L, 1024L+63L, 4, true, Arrays.asList(new Long[]{
|
||||
new Long(0x800000000000040L), new Long(0x800000000000043L),
|
||||
new Long(0x80000000000000L), new Long(0x80000000000003L)
|
||||
}).iterator());
|
||||
|
||||
// the full long range should only consist of a lowest precision range; no bitset testing here, as too much memory needed :-)
|
||||
assertLongRangeSplit(Long.MIN_VALUE, Long.MAX_VALUE, 8, false, Arrays.asList(new Long[]{
|
||||
new Long(0x00L),new Long(0xffL)
|
||||
}).iterator());
|
||||
|
||||
// the same with precisionStep=4
|
||||
assertLongRangeSplit(Long.MIN_VALUE, Long.MAX_VALUE, 4, false, Arrays.asList(new Long[]{
|
||||
new Long(0x0L),new Long(0xfL)
|
||||
}).iterator());
|
||||
|
||||
// the same with precisionStep=2
|
||||
assertLongRangeSplit(Long.MIN_VALUE, Long.MAX_VALUE, 2, false, Arrays.asList(new Long[]{
|
||||
new Long(0x0L),new Long(0x3L)
|
||||
}).iterator());
|
||||
|
||||
// the same with precisionStep=1
|
||||
assertLongRangeSplit(Long.MIN_VALUE, Long.MAX_VALUE, 1, false, Arrays.asList(new Long[]{
|
||||
new Long(0x0L),new Long(0x1L)
|
||||
}).iterator());
|
||||
|
||||
// a inverse range should produce no sub-ranges
|
||||
assertLongRangeSplit(9500L, -5000L, 4, false, Collections.EMPTY_LIST.iterator());
|
||||
|
||||
// a 0-length range should reproduce the range itsself
|
||||
assertLongRangeSplit(9500L, 9500L, 4, false, Arrays.asList(new Long[]{
|
||||
new Long(0x800000000000251cL),new Long(0x800000000000251cL)
|
||||
}).iterator());
|
||||
}
|
||||
|
||||
/** Note: The neededBounds iterator must be unsigned (easier understanding what's happening) */
|
||||
protected void assertIntRangeSplit(final int lower, final int upper, int precisionStep,
|
||||
final boolean useBitSet, final Iterator neededBounds
|
||||
) throws Exception {
|
||||
final OpenBitSet bits=useBitSet ? new OpenBitSet(upper-lower+1) : null;
|
||||
|
||||
TrieUtils.splitIntRange(new TrieUtils.IntRangeBuilder() {
|
||||
//@Override
|
||||
public void addRange(int min, int max, int shift) {
|
||||
assertTrue("min, max should be inside bounds", min>=lower && min<=upper && max>=lower && max<=upper);
|
||||
if (useBitSet) for (int i=min; i<=max; i++) {
|
||||
assertFalse("ranges should not overlap", bits.getAndSet(i-lower) );
|
||||
}
|
||||
// make unsigned ints for easier display and understanding
|
||||
min ^= 0x80000000;
|
||||
max ^= 0x80000000;
|
||||
//System.out.println("new Integer(0x"+Integer.toHexString(min>>>shift)+"),new Integer(0x"+Integer.toHexString(max>>>shift)+"),");
|
||||
assertEquals( "inner min bound", ((Integer)neededBounds.next()).intValue(), min>>>shift);
|
||||
assertEquals( "inner max bound", ((Integer)neededBounds.next()).intValue(), max>>>shift);
|
||||
}
|
||||
}, precisionStep, lower, upper);
|
||||
|
||||
if (useBitSet) {
|
||||
// after flipping all bits in the range, the cardinality should be zero
|
||||
bits.flip(0,upper-lower+1);
|
||||
assertTrue("The sub-range concenated should match the whole range", bits.isEmpty());
|
||||
}
|
||||
}
|
||||
|
||||
public void testSplitIntRange() throws Exception {
|
||||
// a hard-coded "standard" range
|
||||
assertIntRangeSplit(-5000, 9500, 4, true, Arrays.asList(new Integer[]{
|
||||
new Integer(0x7fffec78),new Integer(0x7fffec7f),
|
||||
new Integer(0x80002510),new Integer(0x8000251c),
|
||||
new Integer(0x7fffec8), new Integer(0x7fffecf),
|
||||
new Integer(0x8000250), new Integer(0x8000250),
|
||||
new Integer(0x7fffed), new Integer(0x7fffef),
|
||||
new Integer(0x800020), new Integer(0x800024),
|
||||
new Integer(0x7ffff), new Integer(0x80001)
|
||||
}).iterator());
|
||||
|
||||
// the same with no range splitting
|
||||
assertIntRangeSplit(-5000, 9500, 32, true, Arrays.asList(new Integer[]{
|
||||
new Integer(0x7fffec78),new Integer(0x8000251c)
|
||||
}).iterator());
|
||||
|
||||
// this tests optimized range splitting, if one of the inner bounds
|
||||
// is also the bound of the next lower precision, it should be used completely
|
||||
assertIntRangeSplit(0, 1024+63, 4, true, Arrays.asList(new Integer[]{
|
||||
new Integer(0x8000040), new Integer(0x8000043),
|
||||
new Integer(0x800000), new Integer(0x800003)
|
||||
}).iterator());
|
||||
|
||||
// the full int range should only consist of a lowest precision range; no bitset testing here, as too much memory needed :-)
|
||||
assertIntRangeSplit(Integer.MIN_VALUE, Integer.MAX_VALUE, 8, false, Arrays.asList(new Integer[]{
|
||||
new Integer(0x00),new Integer(0xff)
|
||||
}).iterator());
|
||||
|
||||
// the same with precisionStep=4
|
||||
assertIntRangeSplit(Integer.MIN_VALUE, Integer.MAX_VALUE, 4, false, Arrays.asList(new Integer[]{
|
||||
new Integer(0x0),new Integer(0xf)
|
||||
}).iterator());
|
||||
|
||||
// the same with precisionStep=2
|
||||
assertIntRangeSplit(Integer.MIN_VALUE, Integer.MAX_VALUE, 2, false, Arrays.asList(new Integer[]{
|
||||
new Integer(0x0),new Integer(0x3)
|
||||
}).iterator());
|
||||
|
||||
// the same with precisionStep=1
|
||||
assertIntRangeSplit(Integer.MIN_VALUE, Integer.MAX_VALUE, 1, false, Arrays.asList(new Integer[]{
|
||||
new Integer(0x0),new Integer(0x1)
|
||||
}).iterator());
|
||||
|
||||
// a inverse range should produce no sub-ranges
|
||||
assertIntRangeSplit(9500, -5000, 4, false, Collections.EMPTY_LIST.iterator());
|
||||
|
||||
// a 0-length range should reproduce the range itsself
|
||||
assertIntRangeSplit(9500, 9500, 4, false, Arrays.asList(new Integer[]{
|
||||
new Integer(0x8000251c),new Integer(0x8000251c)
|
||||
}).iterator());
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue