mirror of https://github.com/apache/lucene.git
LUCENE-1470: New implementation using encoding of TrieUtils and TrieRangeFilter that now also supports 32bit and 64bit fields
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@744207 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
18b819abe0
commit
8b79fa51a5
|
@ -0,0 +1,160 @@
|
|||
package org.apache.lucene.search.trie;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.search.Filter;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.ConstantScoreQuery;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.TermDocs;
|
||||
import org.apache.lucene.index.TermEnum;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
|
||||
|
||||
abstract class AbstractTrieRangeFilter extends Filter {
|
||||
|
||||
AbstractTrieRangeFilter(final String[] fields, final int precisionStep,
|
||||
Number min, Number max, final boolean minInclusive, final boolean maxInclusive
|
||||
) {
|
||||
this.fields=(String[])fields.clone();
|
||||
this.precisionStep=precisionStep;
|
||||
this.min=min;
|
||||
this.max=max;
|
||||
this.minInclusive=minInclusive;
|
||||
this.maxInclusive=maxInclusive;
|
||||
}
|
||||
|
||||
//@Override
|
||||
public String toString() {
|
||||
return toString(null);
|
||||
}
|
||||
|
||||
public String toString(final String field) {
|
||||
final StringBuffer sb=new StringBuffer();
|
||||
if (!this.fields[0].equals(field)) sb.append(this.fields[0]).append(':');
|
||||
return sb.append(minInclusive ? '[' : '{')
|
||||
.append((min==null) ? "*" : min.toString())
|
||||
.append(" TO ")
|
||||
.append((max==null) ? "*" : max.toString())
|
||||
.append(maxInclusive ? ']' : '}').toString();
|
||||
}
|
||||
|
||||
//@Override
|
||||
public final boolean equals(final Object o) {
|
||||
if (o==this) return true;
|
||||
if (o==null) return false;
|
||||
if (this.getClass().equals(o.getClass())) {
|
||||
AbstractTrieRangeFilter q=(AbstractTrieRangeFilter)o;
|
||||
return (
|
||||
Arrays.equals(fields,q.fields) &&
|
||||
(q.min == null ? min == null : q.min.equals(min)) &&
|
||||
(q.max == null ? max == null : q.max.equals(max)) &&
|
||||
minInclusive==q.minInclusive &&
|
||||
maxInclusive==q.maxInclusive &&
|
||||
precisionStep==q.precisionStep
|
||||
);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
//@Override
|
||||
public final int hashCode() {
|
||||
int hash=Arrays.asList(fields).hashCode()+(precisionStep^0x64365465);
|
||||
if (min!=null) hash += min.hashCode()^0x14fa55fb;
|
||||
if (max!=null) hash += max.hashCode()^0x733fa5fe;
|
||||
return hash+
|
||||
(Boolean.valueOf(minInclusive).hashCode()^0x14fa55fb)+
|
||||
(Boolean.valueOf(maxInclusive).hashCode()^0x733fa5fe);
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert: Return the number of terms visited during the last execution of {@link #getDocIdSet}.
|
||||
* This may be used for performance comparisons of different trie variants and their effectiveness.
|
||||
* This method is not thread safe, be sure to only call it when no query is running!
|
||||
* @throws IllegalStateException if {@link #getDocIdSet} was not yet executed.
|
||||
*/
|
||||
public int getLastNumberOfTerms() {
|
||||
if (lastNumberOfTerms < 0) throw new IllegalStateException();
|
||||
return lastNumberOfTerms;
|
||||
}
|
||||
|
||||
void resetLastNumberOfTerms() {
|
||||
lastNumberOfTerms=0;
|
||||
}
|
||||
|
||||
/** Returns this range filter as a query.
|
||||
* Using this method, it is possible to create a Query using <code>new {Long|Int}TrieRangeFilter(....).asQuery()</code>.
|
||||
* This is a synonym for wrapping with a {@link ConstantScoreQuery},
|
||||
* but this query returns a better <code>toString()</code> variant.
|
||||
*/
|
||||
public Query asQuery() {
|
||||
return new ConstantScoreQuery(this) {
|
||||
|
||||
/** this instance return a nicer String variant than the original {@link ConstantScoreQuery} */
|
||||
//@Override
|
||||
public String toString(final String field) {
|
||||
// return a more convenient representation of this query than ConstantScoreQuery does:
|
||||
return ((AbstractTrieRangeFilter) filter).toString(field)+ToStringUtils.boost(getBoost());
|
||||
}
|
||||
|
||||
};
|
||||
}
|
||||
|
||||
void fillBits(
|
||||
final IndexReader reader,
|
||||
final OpenBitSet bits, final TermDocs termDocs,
|
||||
String field,
|
||||
final String lowerTerm, final String upperTerm
|
||||
) throws IOException {
|
||||
final int len=lowerTerm.length();
|
||||
assert upperTerm.length()==len;
|
||||
field=field.intern();
|
||||
|
||||
// find the docs
|
||||
final TermEnum enumerator = reader.terms(new Term(field, lowerTerm));
|
||||
try {
|
||||
do {
|
||||
final Term term = enumerator.term();
|
||||
if (term!=null && term.field()==field) {
|
||||
// break out when upperTerm reached or length of term is different
|
||||
final String t=term.text();
|
||||
if (len!=t.length() || t.compareTo(upperTerm)>0) break;
|
||||
// we have a good term, find the docs
|
||||
lastNumberOfTerms++;
|
||||
termDocs.seek(enumerator);
|
||||
while (termDocs.next()) bits.set(termDocs.doc());
|
||||
} else break;
|
||||
} while (enumerator.next());
|
||||
} finally {
|
||||
enumerator.close();
|
||||
}
|
||||
}
|
||||
|
||||
// members
|
||||
final String[] fields;
|
||||
final int precisionStep;
|
||||
final Number min,max;
|
||||
final boolean minInclusive,maxInclusive;
|
||||
|
||||
private int lastNumberOfTerms=-1;
|
||||
}
|
|
@ -0,0 +1,140 @@
|
|||
package org.apache.lucene.search.trie;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.search.Filter;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.TermDocs;
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
|
||||
|
||||
/**
|
||||
* Implementation of a Lucene {@link Filter} that implements trie-based range filtering for ints/floats.
|
||||
* This filter depends on a specific structure of terms in the index that can only be created
|
||||
* by {@link TrieUtils} methods.
|
||||
* For more information, how the algorithm works, see the {@linkplain org.apache.lucene.search.trie package description}.
|
||||
*/
|
||||
public class IntTrieRangeFilter extends AbstractTrieRangeFilter {
|
||||
|
||||
/**
|
||||
* A trie filter for matching trie coded values using the given field name and
|
||||
* the default helper field.
|
||||
* <code>precisionStep</code> must me equal or a multiple of the <code>precisionStep</code>
|
||||
* used for indexing the values.
|
||||
* You can leave the bounds open, by supplying <code>null</code> for <code>min</code> and/or
|
||||
* <code>max</code>. Inclusive/exclusive bounds can also be supplied.
|
||||
* To query float values use the converter {@link TrieUtils#floatToSortableInt}.
|
||||
* <p>This is the counterpart to {@link TrieUtils#addIndexedFields(Document,String,String[])}.
|
||||
* <p><b>This is the recommended usage of TrieUtils/IntTrieRangeFilter.</b>
|
||||
*/
|
||||
public IntTrieRangeFilter(final String field, final int precisionStep,
|
||||
final Integer min, final Integer max, final boolean minInclusive, final boolean maxInclusive
|
||||
) {
|
||||
this(
|
||||
new String[]{field, field+TrieUtils.LOWER_PRECISION_FIELD_NAME_SUFFIX},
|
||||
precisionStep,min,max,minInclusive,maxInclusive
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert: A trie filter for matching trie coded values using the given field names.
|
||||
* You can specify the main and helper field name, that was used to idex the values.
|
||||
* <code>precisionStep</code> must me equal or a multiple of the <code>precisionStep</code>
|
||||
* used for indexing the values.
|
||||
* You can leave the bounds open, by supplying <code>null</code> for <code>min</code> and/or
|
||||
* <code>max</code>. Inclusive/exclusive bounds can also be supplied.
|
||||
* To query float values use the converter {@link TrieUtils#floatToSortableInt}.
|
||||
* <p>This is the counterpart to {@link TrieUtils#addIndexedFields(Document,String,String,String[])}.
|
||||
*/
|
||||
public IntTrieRangeFilter(final String field, final String lowerPrecisionField, final int precisionStep,
|
||||
final Integer min, final Integer max, final boolean minInclusive, final boolean maxInclusive
|
||||
) {
|
||||
this(new String[]{field, lowerPrecisionField},precisionStep,min,max,minInclusive,maxInclusive);
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert: A trie filter for matching trie coded values
|
||||
* using the given field names. If the array of field names is shorter than the
|
||||
* trieCoded one, all trieCoded values with higher index get the last field name.
|
||||
* <code>precisionStep</code> must me equal or a multiple of the <code>precisionStep</code>
|
||||
* used for indexing the values.
|
||||
* You can leave the bounds open, by supplying <code>null</code> for <code>min</code> and/or
|
||||
* <code>max</code>. Inclusive/exclusive bounds can also be supplied.
|
||||
* To query float values use the converter {@link TrieUtils#floatToSortableInt}.
|
||||
* <p>This is the counterpart to {@link TrieUtils#addIndexedFields(Document,String[],String[])}.
|
||||
*/
|
||||
public IntTrieRangeFilter(final String[] fields, final int precisionStep,
|
||||
Integer min, Integer max, final boolean minInclusive, final boolean maxInclusive
|
||||
) {
|
||||
super(fields, precisionStep, min, max, minInclusive, maxInclusive);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a DocIdSet that provides the documents which should be permitted or prohibited in search results.
|
||||
*/
|
||||
//@Override
|
||||
public DocIdSet getDocIdSet(final IndexReader reader) throws IOException {
|
||||
// calculate the upper and lower bounds respecting the inclusive and null values.
|
||||
int minBound=(this.min==null) ? Integer.MIN_VALUE : (
|
||||
minInclusive ? this.min.intValue() : (this.min.intValue()+1)
|
||||
);
|
||||
int maxBound=(this.max==null) ? Integer.MAX_VALUE : (
|
||||
maxInclusive ? this.max.intValue() : (this.max.intValue()-1)
|
||||
);
|
||||
|
||||
resetLastNumberOfTerms();
|
||||
if (minBound > maxBound) {
|
||||
// shortcut, no docs will match this
|
||||
return DocIdSet.EMPTY_DOCIDSET;
|
||||
} else {
|
||||
final OpenBitSet bits = new OpenBitSet(reader.maxDoc());
|
||||
final TermDocs termDocs = reader.termDocs();
|
||||
try {
|
||||
TrieUtils.splitIntRange(new TrieUtils.IntRangeBuilder() {
|
||||
|
||||
//@Override
|
||||
public final void addRange(String minPrefixCoded, String maxPrefixCoded, int level) {
|
||||
try {
|
||||
fillBits(
|
||||
reader, bits, termDocs,
|
||||
fields[Math.min(fields.length-1, level)],
|
||||
minPrefixCoded, maxPrefixCoded
|
||||
);
|
||||
} catch (IOException ioe) {
|
||||
// IntRangeBuilder is not allowed to throw checked exceptions:
|
||||
// wrap as RuntimeException
|
||||
throw new RuntimeException(ioe);
|
||||
}
|
||||
}
|
||||
|
||||
}, precisionStep, minBound, maxBound);
|
||||
} catch (RuntimeException e) {
|
||||
if (e.getCause() instanceof IOException) throw (IOException)e.getCause();
|
||||
throw e;
|
||||
} finally {
|
||||
termDocs.close();
|
||||
}
|
||||
return bits;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,140 @@
|
|||
package org.apache.lucene.search.trie;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.search.Filter;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.TermDocs;
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
|
||||
|
||||
/**
|
||||
* Implementation of a Lucene {@link Filter} that implements trie-based range filtering for longs/doubles.
|
||||
* This filter depends on a specific structure of terms in the index that can only be created
|
||||
* by {@link TrieUtils} methods.
|
||||
* For more information, how the algorithm works, see the {@linkplain org.apache.lucene.search.trie package description}.
|
||||
*/
|
||||
public class LongTrieRangeFilter extends AbstractTrieRangeFilter {
|
||||
|
||||
/**
|
||||
* A trie filter for matching trie coded values using the given field name and
|
||||
* the default helper field.
|
||||
* <code>precisionStep</code> must me equal or a multiple of the <code>precisionStep</code>
|
||||
* used for indexing the values.
|
||||
* You can leave the bounds open, by supplying <code>null</code> for <code>min</code> and/or
|
||||
* <code>max</code>. Inclusive/exclusive bounds can also be supplied.
|
||||
* To query double values use the converter {@link TrieUtils#doubleToSortableLong}.
|
||||
* <p>This is the counterpart to {@link TrieUtils#addIndexedFields(Document,String,String[])}.
|
||||
* <p><b>This is the recommended usage of TrieUtils/LongTrieRangeFilter.</b>
|
||||
*/
|
||||
public LongTrieRangeFilter(final String field, final int precisionStep,
|
||||
final Long min, final Long max, final boolean minInclusive, final boolean maxInclusive
|
||||
) {
|
||||
this(
|
||||
new String[]{field, field+TrieUtils.LOWER_PRECISION_FIELD_NAME_SUFFIX},
|
||||
precisionStep,min,max,minInclusive,maxInclusive
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert: A trie filter for matching trie coded values using the given field names.
|
||||
* You can specify the main and helper field name, that was used to idex the values.
|
||||
* <code>precisionStep</code> must me equal or a multiple of the <code>precisionStep</code>
|
||||
* used for indexing the values.
|
||||
* You can leave the bounds open, by supplying <code>null</code> for <code>min</code> and/or
|
||||
* <code>max</code>. Inclusive/exclusive bounds can also be supplied.
|
||||
* To query double values use the converter {@link TrieUtils#doubleToSortableLong}.
|
||||
* <p>This is the counterpart to {@link TrieUtils#addIndexedFields(Document,String,String,String[])}.
|
||||
*/
|
||||
public LongTrieRangeFilter(final String field, final String lowerPrecisionField, final int precisionStep,
|
||||
final Long min, final Long max, final boolean minInclusive, final boolean maxInclusive
|
||||
) {
|
||||
this(new String[]{field, lowerPrecisionField},precisionStep,min,max,minInclusive,maxInclusive);
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert: A trie filter for matching trie coded values
|
||||
* using the given field names. If the array of field names is shorter than the
|
||||
* trieCoded one, all trieCoded values with higher index get the last field name.
|
||||
* <code>precisionStep</code> must me equal or a multiple of the <code>precisionStep</code>
|
||||
* used for indexing the values.
|
||||
* You can leave the bounds open, by supplying <code>null</code> for <code>min</code> and/or
|
||||
* <code>max</code>. Inclusive/exclusive bounds can also be supplied.
|
||||
* To query double values use the converter {@link TrieUtils#doubleToSortableLong}.
|
||||
* <p>This is the counterpart to {@link TrieUtils#addIndexedFields(Document,String[],String[])}.
|
||||
*/
|
||||
public LongTrieRangeFilter(final String[] fields, final int precisionStep,
|
||||
Long min, Long max, final boolean minInclusive, final boolean maxInclusive
|
||||
) {
|
||||
super(fields, precisionStep, min, max, minInclusive, maxInclusive);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a DocIdSet that provides the documents which should be permitted or prohibited in search results.
|
||||
*/
|
||||
//@Override
|
||||
public DocIdSet getDocIdSet(final IndexReader reader) throws IOException {
|
||||
// calculate the upper and lower bounds respecting the inclusive and null values.
|
||||
long minBound=(this.min==null) ? Long.MIN_VALUE : (
|
||||
minInclusive ? this.min.longValue() : (this.min.longValue()+1L)
|
||||
);
|
||||
long maxBound=(this.max==null) ? Long.MAX_VALUE : (
|
||||
maxInclusive ? this.max.longValue() : (this.max.longValue()-1L)
|
||||
);
|
||||
|
||||
resetLastNumberOfTerms();
|
||||
if (minBound > maxBound) {
|
||||
// shortcut, no docs will match this
|
||||
return DocIdSet.EMPTY_DOCIDSET;
|
||||
} else {
|
||||
final OpenBitSet bits = new OpenBitSet(reader.maxDoc());
|
||||
final TermDocs termDocs = reader.termDocs();
|
||||
try {
|
||||
TrieUtils.splitLongRange(new TrieUtils.LongRangeBuilder() {
|
||||
|
||||
//@Override
|
||||
public final void addRange(String minPrefixCoded, String maxPrefixCoded, int level) {
|
||||
try {
|
||||
fillBits(
|
||||
reader, bits, termDocs,
|
||||
fields[Math.min(fields.length-1, level)],
|
||||
minPrefixCoded, maxPrefixCoded
|
||||
);
|
||||
} catch (IOException ioe) {
|
||||
// LongRangeBuilder is not allowed to throw checked exceptions:
|
||||
// wrap as RuntimeException
|
||||
throw new RuntimeException(ioe);
|
||||
}
|
||||
}
|
||||
|
||||
}, precisionStep, minBound, maxBound);
|
||||
} catch (RuntimeException e) {
|
||||
if (e.getCause() instanceof IOException) throw (IOException)e.getCause();
|
||||
throw e;
|
||||
} finally {
|
||||
termDocs.close();
|
||||
}
|
||||
return bits;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -1,302 +0,0 @@
|
|||
package org.apache.lucene.search.trie;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Date;
|
||||
|
||||
import org.apache.lucene.search.Filter;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.TermDocs;
|
||||
import org.apache.lucene.index.TermEnum;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
|
||||
/**
|
||||
* Implementation of a Lucene {@link Filter} that implements trie-based range filtering.
|
||||
* This filter depends on a specific structure of terms in the index that can only be created
|
||||
* by {@link TrieUtils} methods.
|
||||
* For more information, how the algorithm works, see the package description {@link org.apache.lucene.search.trie}.
|
||||
*/
|
||||
public final class TrieRangeFilter extends Filter {
|
||||
|
||||
/**
|
||||
* Universal constructor (expert use only): Uses already trie-converted min/max values.
|
||||
* You can set <code>min</code> or <code>max</code> (but not both) to <code>null</code> to leave one bound open.
|
||||
* With <code>minInclusive</code> and <code>maxInclusive</code> can be choosen, if the corresponding
|
||||
* bound should be included or excluded from the range.
|
||||
*/
|
||||
public TrieRangeFilter(final String field, String min, String max,
|
||||
final boolean minInclusive, final boolean maxInclusive, final TrieUtils variant
|
||||
) {
|
||||
if (min==null && max==null) throw new IllegalArgumentException("The min and max values cannot be both null.");
|
||||
this.trieVariant=variant;
|
||||
this.field=field.intern();
|
||||
// just for toString()
|
||||
this.minUnconverted=min;
|
||||
this.maxUnconverted=max;
|
||||
this.minInclusive=minInclusive;
|
||||
this.maxInclusive=maxInclusive;
|
||||
// encode bounds
|
||||
this.min=(min==null) ? trieVariant.TRIE_CODED_NUMERIC_MIN : (
|
||||
minInclusive ? min : variant.incrementTrieCoded(min)
|
||||
);
|
||||
this.max=(max==null) ? trieVariant.TRIE_CODED_NUMERIC_MAX : (
|
||||
maxInclusive ? max : variant.decrementTrieCoded(max)
|
||||
);
|
||||
// check encoded values
|
||||
if (
|
||||
this.min.length() != trieVariant.TRIE_CODED_LENGTH ||
|
||||
this.max.length() != trieVariant.TRIE_CODED_LENGTH
|
||||
) throw new NumberFormatException("Invalid trie encoded numerical value representation (incompatible length).");
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a trie filter using the supplied field with range bounds in numeric form (double).
|
||||
* You can set <code>min</code> or <code>max</code> (but not both) to <code>null</code> to leave one bound open.
|
||||
* With <code>minInclusive</code> and <code>maxInclusive</code> can be choosen, if the corresponding
|
||||
* bound should be included or excluded from the range.
|
||||
*/
|
||||
public TrieRangeFilter(final String field, final Double min, final Double max,
|
||||
final boolean minInclusive, final boolean maxInclusive, final TrieUtils variant
|
||||
) {
|
||||
this(
|
||||
field,
|
||||
(min==null) ? null : variant.doubleToTrieCoded(min.doubleValue()),
|
||||
(max==null) ? null : variant.doubleToTrieCoded(max.doubleValue()),
|
||||
minInclusive,
|
||||
maxInclusive,
|
||||
variant
|
||||
);
|
||||
this.minUnconverted=min;
|
||||
this.maxUnconverted=max;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a trie filter using the supplied field with range bounds in date/time form.
|
||||
* You can set <code>min</code> or <code>max</code> (but not both) to <code>null</code> to leave one bound open.
|
||||
* With <code>minInclusive</code> and <code>maxInclusive</code> can be choosen, if the corresponding
|
||||
* bound should be included or excluded from the range.
|
||||
*/
|
||||
public TrieRangeFilter(final String field, final Date min, final Date max,
|
||||
final boolean minInclusive, final boolean maxInclusive, final TrieUtils variant
|
||||
) {
|
||||
this(
|
||||
field,
|
||||
(min==null) ? null : variant.dateToTrieCoded(min),
|
||||
(max==null) ? null : variant.dateToTrieCoded(max),
|
||||
minInclusive,
|
||||
maxInclusive,
|
||||
variant
|
||||
);
|
||||
this.minUnconverted=min;
|
||||
this.maxUnconverted=max;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a trie filter using the supplied field with range bounds in integer form (long).
|
||||
* You can set <code>min</code> or <code>max</code> (but not both) to <code>null</code> to leave one bound open.
|
||||
* With <code>minInclusive</code> and <code>maxInclusive</code> can be choosen, if the corresponding
|
||||
* bound should be included or excluded from the range.
|
||||
*/
|
||||
public TrieRangeFilter(final String field, final Long min, final Long max,
|
||||
final boolean minInclusive, final boolean maxInclusive, final TrieUtils variant
|
||||
) {
|
||||
this(
|
||||
field,
|
||||
(min==null) ? null : variant.longToTrieCoded(min.longValue()),
|
||||
(max==null) ? null : variant.longToTrieCoded(max.longValue()),
|
||||
minInclusive,
|
||||
maxInclusive,
|
||||
variant
|
||||
);
|
||||
this.minUnconverted=min;
|
||||
this.maxUnconverted=max;
|
||||
}
|
||||
|
||||
//@Override
|
||||
public String toString() {
|
||||
return toString(null);
|
||||
}
|
||||
|
||||
public String toString(final String field) {
|
||||
final StringBuffer sb=new StringBuffer();
|
||||
if (!this.field.equals(field)) sb.append(this.field).append(':');
|
||||
return sb.append(minInclusive ? '[' : '{')
|
||||
.append((minUnconverted==null) ? "*" : minUnconverted.toString())
|
||||
.append(" TO ")
|
||||
.append((maxUnconverted==null) ? "*" : maxUnconverted.toString())
|
||||
.append(maxInclusive ? ']' : '}').toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Two instances are equal if they have the same trie-encoded range bounds, same field, and same variant.
|
||||
* If one of the instances uses an exclusive lower bound, it is equal to a range with inclusive bound,
|
||||
* when the inclusive lower bound is equal to the incremented exclusive lower bound of the other one.
|
||||
* The same applys for the upper bound in other direction.
|
||||
*/
|
||||
//@Override
|
||||
public final boolean equals(final Object o) {
|
||||
if (o instanceof TrieRangeFilter) {
|
||||
TrieRangeFilter q=(TrieRangeFilter)o;
|
||||
// trieVariants are singleton per type, so no equals needed.
|
||||
return (field==q.field && min.equals(q.min) && max.equals(q.max) && trieVariant==q.trieVariant);
|
||||
} else return false;
|
||||
}
|
||||
|
||||
//@Override
|
||||
public final int hashCode() {
|
||||
// the hash code uses from the variant only the number of bits, as this is unique for the variant
|
||||
return field.hashCode()+(min.hashCode()^0x14fa55fb)+(max.hashCode()^0x733fa5fe)+(trieVariant.TRIE_BITS^0x64365465);
|
||||
}
|
||||
|
||||
/** prints the String in hexadecimal \\u notation (for debugging of <code>setBits()</code>) */
|
||||
private String stringToHexDigits(final String s) {
|
||||
StringBuffer sb=new StringBuffer(s.length()*3);
|
||||
for (int i=0,c=s.length(); i<c; i++) {
|
||||
char ch=s.charAt(i);
|
||||
sb.append("\\u").append(Integer.toHexString((int)ch));
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
/** Marks documents in a specific range. Code borrowed from original RangeFilter and simplified (and returns number of terms) */
|
||||
private int setBits(final IndexReader reader, final TermDocs termDocs, final OpenBitSet bits, String lowerTerm, String upperTerm) throws IOException {
|
||||
//System.out.println(stringToHexDigits(lowerTerm)+" TO "+stringToHexDigits(upperTerm));
|
||||
int count=0,len=lowerTerm.length();
|
||||
final String field;
|
||||
if (len<trieVariant.TRIE_CODED_LENGTH) {
|
||||
// lower precision value is in helper field
|
||||
field=(this.field + trieVariant.LOWER_PRECISION_FIELD_NAME_SUFFIX).intern();
|
||||
// add padding before lower precision values to group them
|
||||
lowerTerm=new StringBuffer(len+1).append((char)(trieVariant.TRIE_CODED_PADDING_START+len)).append(lowerTerm).toString();
|
||||
upperTerm=new StringBuffer(len+1).append((char)(trieVariant.TRIE_CODED_PADDING_START+len)).append(upperTerm).toString();
|
||||
// length is longer by 1 char because of padding
|
||||
len++;
|
||||
} else {
|
||||
// full precision value is in original field
|
||||
field=this.field;
|
||||
}
|
||||
final TermEnum enumerator = reader.terms(new Term(field, lowerTerm));
|
||||
try {
|
||||
do {
|
||||
final Term term = enumerator.term();
|
||||
if (term!=null && term.field()==field) {
|
||||
// break out when upperTerm reached or length of term is different
|
||||
final String t=term.text();
|
||||
if (len!=t.length() || t.compareTo(upperTerm)>0) break;
|
||||
// we have a good term, find the docs
|
||||
count++;
|
||||
termDocs.seek(enumerator);
|
||||
while (termDocs.next()) bits.set(termDocs.doc());
|
||||
} else break;
|
||||
} while (enumerator.next());
|
||||
} finally {
|
||||
enumerator.close();
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
/** Splits range recursively (and returns number of terms) */
|
||||
private int splitRange(
|
||||
final IndexReader reader, final TermDocs termDocs, final OpenBitSet bits,
|
||||
final String min, final boolean lowerBoundOpen, final String max, final boolean upperBoundOpen
|
||||
) throws IOException {
|
||||
int count=0;
|
||||
final int length=min.length();
|
||||
final String minShort=lowerBoundOpen ? min.substring(0,length-1) : trieVariant.incrementTrieCoded(min.substring(0,length-1));
|
||||
final String maxShort=upperBoundOpen ? max.substring(0,length-1) : trieVariant.decrementTrieCoded(max.substring(0,length-1));
|
||||
|
||||
if (length==1 || minShort.compareTo(maxShort)>=0) {
|
||||
// we are in the lowest precision or the current precision is not existent
|
||||
count+=setBits(reader, termDocs, bits, min, max);
|
||||
} else {
|
||||
// Avoid too much seeking: first go deeper into lower precision
|
||||
// (in IndexReader's TermEnum these terms are earlier).
|
||||
// Do this only, if the current length is not trieVariant.TRIE_CODED_LENGTH (not full precision),
|
||||
// because terms from the highest prec come before all lower prec terms
|
||||
// (because the field name is ordered before the suffixed one).
|
||||
if (length!=trieVariant.TRIE_CODED_LENGTH) count+=splitRange(
|
||||
reader,termDocs,bits,
|
||||
minShort,lowerBoundOpen,
|
||||
maxShort,upperBoundOpen
|
||||
);
|
||||
// Avoid too much seeking: set bits for lower part of current (higher) precision.
|
||||
// These terms come later in IndexReader's TermEnum.
|
||||
if (!lowerBoundOpen) {
|
||||
count+=setBits(reader, termDocs, bits, min, trieVariant.decrementTrieCoded(minShort+trieVariant.TRIE_CODED_SYMBOL_MIN));
|
||||
}
|
||||
// Avoid too much seeking: set bits for upper part of current precision.
|
||||
// These terms come later in IndexReader's TermEnum.
|
||||
if (!upperBoundOpen) {
|
||||
count+=setBits(reader, termDocs, bits, trieVariant.incrementTrieCoded(maxShort+trieVariant.TRIE_CODED_SYMBOL_MAX), max);
|
||||
}
|
||||
// If the first step (see above) was not done (because length==trieVariant.TRIE_CODED_LENGTH) we do it now.
|
||||
if (length==trieVariant.TRIE_CODED_LENGTH) count+=splitRange(
|
||||
reader,termDocs,bits,
|
||||
minShort,lowerBoundOpen,
|
||||
maxShort,upperBoundOpen
|
||||
);
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a DocIdSet that provides the documents which should be permitted or prohibited in search results.
|
||||
*/
|
||||
//@Override
|
||||
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
|
||||
if (min.compareTo(max) > 0) {
|
||||
// shortcut: if min>max, no docs will match!
|
||||
lastNumberOfTerms=0;
|
||||
return DocIdSet.EMPTY_DOCIDSET;
|
||||
} else {
|
||||
final OpenBitSet bits = new OpenBitSet(reader.maxDoc());
|
||||
final TermDocs termDocs = reader.termDocs();
|
||||
try {
|
||||
lastNumberOfTerms=splitRange(
|
||||
reader,termDocs,bits,
|
||||
min,trieVariant.TRIE_CODED_NUMERIC_MIN.equals(min),
|
||||
max,trieVariant.TRIE_CODED_NUMERIC_MAX.equals(max)
|
||||
);
|
||||
} finally {
|
||||
termDocs.close();
|
||||
}
|
||||
return bits;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* EXPERT: Return the number of terms visited during the last execution of {@link #getDocIdSet}.
|
||||
* This may be used for performance comparisons of different trie variants and their effectiveness.
|
||||
* This method is not thread safe, be sure to only call it when no query is running!
|
||||
* @throws IllegalStateException if {@link #getDocIdSet} was not yet executed.
|
||||
*/
|
||||
public int getLastNumberOfTerms() {
|
||||
if (lastNumberOfTerms < 0) throw new IllegalStateException();
|
||||
return lastNumberOfTerms;
|
||||
}
|
||||
|
||||
// members
|
||||
private final String field,min,max;
|
||||
private final TrieUtils trieVariant;
|
||||
private final boolean minInclusive,maxInclusive;
|
||||
private Object minUnconverted,maxUnconverted;
|
||||
private int lastNumberOfTerms=-1;
|
||||
}
|
|
@ -1,117 +0,0 @@
|
|||
package org.apache.lucene.search.trie;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Date;
|
||||
|
||||
import org.apache.lucene.search.ConstantScoreQuery;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
|
||||
/**
|
||||
* A Lucene {@link Query} that implements a trie-based range query.
|
||||
* This query depends on a specific structure of terms in the index that can only be created
|
||||
* by {@link TrieUtils} methods.
|
||||
* <p>This class wraps a {@link TrieRangeFilter}.
|
||||
* @see TrieRangeFilter
|
||||
*/
|
||||
public final class TrieRangeQuery extends ConstantScoreQuery {
|
||||
|
||||
/**
|
||||
* Universal constructor (expert use only): Uses already trie-converted min/max values.
|
||||
* You can set <code>min</code> or <code>max</code> (but not both) to <code>null</code> to leave one bound open.
|
||||
* With <code>minInclusive</code> and <code>maxInclusive</code> can be choosen, if the corresponding
|
||||
* bound should be included or excluded from the range.
|
||||
*/
|
||||
public TrieRangeQuery(final String field, final String min, final String max,
|
||||
final boolean minInclusive, final boolean maxInclusive, final TrieUtils variant
|
||||
) {
|
||||
super(new TrieRangeFilter(field,min,max,minInclusive,maxInclusive,variant));
|
||||
}
|
||||
|
||||
/**
|
||||
* A trie query using the supplied field with range bounds in numeric form (double).
|
||||
* You can set <code>min</code> or <code>max</code> (but not both) to <code>null</code> to leave one bound open.
|
||||
* With <code>minInclusive</code> and <code>maxInclusive</code> can be choosen, if the corresponding
|
||||
* bound should be included or excluded from the range.
|
||||
*/
|
||||
public TrieRangeQuery(final String field, final Double min, final Double max,
|
||||
final boolean minInclusive, final boolean maxInclusive, final TrieUtils variant
|
||||
) {
|
||||
super(new TrieRangeFilter(field,min,max,minInclusive,maxInclusive,variant));
|
||||
}
|
||||
|
||||
/**
|
||||
* A trie query using the supplied field with range bounds in date/time form.
|
||||
* You can set <code>min</code> or <code>max</code> (but not both) to <code>null</code> to leave one bound open.
|
||||
* With <code>minInclusive</code> and <code>maxInclusive</code> can be choosen, if the corresponding
|
||||
* bound should be included or excluded from the range.
|
||||
*/
|
||||
public TrieRangeQuery(final String field, final Date min, final Date max,
|
||||
final boolean minInclusive, final boolean maxInclusive, final TrieUtils variant
|
||||
) {
|
||||
super(new TrieRangeFilter(field,min,max,minInclusive,maxInclusive,variant));
|
||||
}
|
||||
|
||||
/**
|
||||
* A trie query using the supplied field with range bounds in integer form (long).
|
||||
* You can set <code>min</code> or <code>max</code> (but not both) to <code>null</code> to leave one bound open.
|
||||
* With <code>minInclusive</code> and <code>maxInclusive</code> can be choosen, if the corresponding
|
||||
* bound should be included or excluded from the range.
|
||||
*/
|
||||
public TrieRangeQuery(final String field, final Long min, final Long max,
|
||||
final boolean minInclusive, final boolean maxInclusive, final TrieUtils variant
|
||||
) {
|
||||
super(new TrieRangeFilter(field,min,max,minInclusive,maxInclusive,variant));
|
||||
}
|
||||
|
||||
/**
|
||||
* EXPERT: Return the number of terms visited during the last execution of the query.
|
||||
* This may be used for performance comparisons of different trie variants and their effectiveness.
|
||||
* When using this method be sure to query an one-segment index (optimized one) to get correct results.
|
||||
* This method is not thread safe, be sure to only call it when no query is running!
|
||||
* @throws IllegalStateException if query was not yet executed.
|
||||
*/
|
||||
public int getLastNumberOfTerms() {
|
||||
return ((TrieRangeFilter) filter).getLastNumberOfTerms();
|
||||
}
|
||||
|
||||
//@Override
|
||||
public String toString(final String field) {
|
||||
// return a more convenient representation of this query than ConstantScoreQuery does:
|
||||
return ((TrieRangeFilter) filter).toString(field)+ToStringUtils.boost(getBoost());
|
||||
}
|
||||
|
||||
/**
|
||||
* Two instances are equal if they have the same trie-encoded range bounds, same field, same boost, and same variant.
|
||||
* If one of the instances uses an exclusive lower bound, it is equal to a range with inclusive bound,
|
||||
* when the inclusive lower bound is equal to the decremented exclusive lower bound.
|
||||
* The same applys for the upper bound in other direction.
|
||||
*/
|
||||
//@Override
|
||||
public final boolean equals(final Object o) {
|
||||
if (!(o instanceof TrieRangeQuery)) return false;
|
||||
return super.equals(o);
|
||||
}
|
||||
|
||||
//@Override
|
||||
public final int hashCode() {
|
||||
// make hashCode a little bit different:
|
||||
return super.hashCode()^0x1756fa55;
|
||||
}
|
||||
|
||||
}
|
|
@ -17,385 +17,531 @@ package org.apache.lucene.search.trie;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Date;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.FieldCache;
|
||||
import org.apache.lucene.search.ExtendedFieldCache;
|
||||
|
||||
/**
|
||||
* This is a helper class to construct the trie-based index entries for numerical values.
|
||||
* <p>For more information on how the algorithm works, see the package description {@link org.apache.lucene.search.trie}.
|
||||
* The format of how the numerical values are stored in index is documented here:
|
||||
* <p>All numerical values are first converted to special <code>unsigned long</code>s by applying some bit-wise transformations. This means:<ul>
|
||||
* <li>{@link Date}s are casted to UNIX timestamps (milliseconds since 1970-01-01, this is how Java represents date/time
|
||||
* internally): {@link Date#getTime()}. The resulting <code>signed long</code> is transformed to the unsigned form like so:</li>
|
||||
* <li><code>signed long</code>s are shifted, so that {@link Long#MIN_VALUE} is mapped to <code>0x0000000000000000</code>,
|
||||
* {@link Long#MAX_VALUE} is mapped to <code>0xffffffffffffffff</code>.</li>
|
||||
* <li><code>double</code>s are converted by getting their IEEE 754 floating-point "double format" bit layout and then some bits
|
||||
* are swapped, to be able to compare the result as <code>unsigned long</code>s.</li>
|
||||
* </ul>
|
||||
* <p>For each variant (you can choose between {@link #VARIANT_8BIT}, {@link #VARIANT_4BIT}, and {@link #VARIANT_2BIT}),
|
||||
* the bitmap of this <code>unsigned long</code> is divided into parts of a number of bits (starting with the most-significant bits)
|
||||
* and each part converted to characters between {@link #TRIE_CODED_SYMBOL_MIN} and {@link #TRIE_CODED_SYMBOL_MAX}.
|
||||
* The resulting {@link String} is comparable like the corresponding <code>unsigned long</code>.
|
||||
* <p>To store the different precisions of the long values (from one character [only the most significant one] to the full encoded length),
|
||||
* each lower precision is prefixed by the length ({@link #TRIE_CODED_PADDING_START}<code>+precision == 0x20+precision</code>),
|
||||
* in an extra "helper" field with a suffixed field name (i.e. fieldname "numeric" => lower precision's name "numeric#trie").
|
||||
* The full long is not prefixed at all and indexed and stored according to the given flags in the original field name.
|
||||
* By this it is possible to get the correct enumeration of terms in correct precision
|
||||
* of the term list by just jumping to the correct fieldname and/or prefix. The full precision value may also be
|
||||
* stored in the document. Having the full precision value as term in a separate field with the original name,
|
||||
* sorting of query results against such fields is possible using the original field name.
|
||||
* For more information on how the algorithm works, see the
|
||||
* {@linkplain org.apache.lucene.search.trie package description}.
|
||||
* <h3>The trie format using prefix encoded numerical values</h3>
|
||||
* <p>To quickly execute range queries in Apache Lucene, a range is divided recursively
|
||||
* into multiple intervals for searching: The center of the range is searched only with
|
||||
* the lowest possible precision in the trie, while the boundaries are matched
|
||||
* more exactly. This reduces the number of terms dramatically.
|
||||
* <p>This class generates terms to achive this: First the numerical integer values need to
|
||||
* be converted to strings. For that integer values (32 bit or 64 bit) are made unsigned
|
||||
* and the bits are converted to ASCII chars with each 7 bit. The resulting string is
|
||||
* sortable like the original integer value.
|
||||
* <p>To also index floating point numbers, this class supplies two methods to convert them
|
||||
* to integer values by changing their bit layout: {@link #doubleToSortableLong},
|
||||
* {@link #floatToSortableInt}. You will have no precision loss by
|
||||
* converting floating point numbers to integers and back (only that the integer form does
|
||||
* is not usable). Other data types like dates can easily converted to longs or ints (e.g.
|
||||
* date to long: {@link java.util.Date#getTime}).
|
||||
* <p>To index the different precisions of the long values each encoded value is also reduced
|
||||
* by zeroing bits from the right. Each value is also prefixed (in the first char) by the
|
||||
* <code>shift</code> value (number of bits removed) used during encoding. This series of
|
||||
* different precision values can be indexed into a Lucene {@link Document} using
|
||||
* {@link #addIndexedFields(Document,String,String[])}. The default is to index the original
|
||||
* precision in the supplied field name and the lower precisions in an additional helper field.
|
||||
* Because of this, the full-precision field can also be sorted (using {@link #getLongSortField}
|
||||
* or {@link #getIntSortField}).
|
||||
* <p>The number of bits removed from the right for each trie entry is called
|
||||
* <code>precisionStep</code> in this API. For comparing the different step values, see the
|
||||
* {@linkplain org.apache.lucene.search.trie package description}.
|
||||
*/
|
||||
public final class TrieUtils {
|
||||
|
||||
/** Instance of TrieUtils using a trie factor of 8 bit.
|
||||
* This is the <b>recommended</b> one (rather fast and storage optimized) */
|
||||
public static final TrieUtils VARIANT_8BIT=new TrieUtils(8);
|
||||
private TrieUtils() {} // no instance!
|
||||
|
||||
/** Instance of TrieUtils using a trie factor of 4 bit. */
|
||||
public static final TrieUtils VARIANT_4BIT=new TrieUtils(4);
|
||||
|
||||
/** Instance of TrieUtils using a trie factor of 2 bit.
|
||||
* This may be good for some indexes, but it needs much storage space
|
||||
* and is not much faster than 8 bit in most cases. */
|
||||
public static final TrieUtils VARIANT_2BIT=new TrieUtils(2);
|
||||
|
||||
/** Marker (PADDING) before lower-precision trie entries to signal the precision value. See class description! */
|
||||
public static final char TRIE_CODED_PADDING_START=(char)0x20;
|
||||
|
||||
/** The "helper" field containing the lower precision terms is the original fieldname with this appended. */
|
||||
/**
|
||||
* The default "helper" field containing the lower precision terms is the original
|
||||
* fieldname with this appended. This suffix is used in
|
||||
* {@link #addIndexedFields(Document,String,String[])} and the corresponding c'tor
|
||||
* of <code>(Long|Int)TrieRangeFilter</code>.
|
||||
*/
|
||||
public static final String LOWER_PRECISION_FIELD_NAME_SUFFIX="#trie";
|
||||
|
||||
/** Character used as lower end */
|
||||
public static final char TRIE_CODED_SYMBOL_MIN=(char)0x100;
|
||||
/**
|
||||
* Longs are stored at lower precision by shifting off lower bits. The shift count is
|
||||
* stored as <code>SHIFT_START_LONG+shift</code> in the first character
|
||||
*/
|
||||
public static final char SHIFT_START_LONG = (char)0x20;
|
||||
|
||||
/**
|
||||
* A parser instance for filling a {@link ExtendedFieldCache}, that parses trie encoded fields as longs,
|
||||
* auto detecting the trie encoding variant using the String length.
|
||||
* Integers are stored at lower precision by shifting off lower bits. The shift count is
|
||||
* stored as <code>SHIFT_START_INT+shift</code> in the first character
|
||||
*/
|
||||
public static final ExtendedFieldCache.LongParser FIELD_CACHE_LONG_PARSER_AUTO=new ExtendedFieldCache.LongParser(){
|
||||
public final long parseLong(String val) {
|
||||
return trieCodedToLongAuto(val);
|
||||
public static final char SHIFT_START_INT = (char)0x60;
|
||||
|
||||
/**
|
||||
* A parser instance for filling a {@link ExtendedFieldCache}, that parses prefix encoded fields as longs.
|
||||
*/
|
||||
public static final ExtendedFieldCache.LongParser FIELD_CACHE_LONG_PARSER=new ExtendedFieldCache.LongParser(){
|
||||
public final long parseLong(final String val) {
|
||||
return prefixCodedToLong(val);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* A parser instance for filling a {@link ExtendedFieldCache}, that parses trie encoded fields as doubles,
|
||||
* auto detecting the trie encoding variant using the String length.
|
||||
* A parser instance for filling a {@link FieldCache}, that parses prefix encoded fields as ints.
|
||||
*/
|
||||
public static final ExtendedFieldCache.DoubleParser FIELD_CACHE_DOUBLE_PARSER_AUTO=new ExtendedFieldCache.DoubleParser(){
|
||||
public final double parseDouble(String val) {
|
||||
return trieCodedToDoubleAuto(val);
|
||||
public static final FieldCache.IntParser FIELD_CACHE_INT_PARSER=new FieldCache.IntParser(){
|
||||
public final int parseInt(final String val) {
|
||||
return prefixCodedToInt(val);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Detects and returns the variant of a trie encoded string using the length.
|
||||
* @throws NumberFormatException if the length is not 8, 16, or 32 chars.
|
||||
* A parser instance for filling a {@link ExtendedFieldCache}, that parses prefix encoded fields as doubles.
|
||||
* This uses {@link #sortableLongToDouble} to convert the encoded long to a double.
|
||||
*/
|
||||
public static final TrieUtils autoDetectVariant(final String s) {
|
||||
final int l=s.length();
|
||||
if (l==VARIANT_8BIT.TRIE_CODED_LENGTH) {
|
||||
return VARIANT_8BIT;
|
||||
} else if (l==VARIANT_4BIT.TRIE_CODED_LENGTH) {
|
||||
return VARIANT_4BIT;
|
||||
} else if (l==VARIANT_2BIT.TRIE_CODED_LENGTH) {
|
||||
return VARIANT_2BIT;
|
||||
} else {
|
||||
throw new NumberFormatException("Invalid trie encoded numerical value representation (incompatible length).");
|
||||
public static final ExtendedFieldCache.DoubleParser FIELD_CACHE_DOUBLE_PARSER=new ExtendedFieldCache.DoubleParser(){
|
||||
public final double parseDouble(final String val) {
|
||||
return sortableLongToDouble(prefixCodedToLong(val));
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* A parser instance for filling a {@link FieldCache}, that parses prefix encoded fields as floats.
|
||||
* This uses {@link #sortableIntToFloat} to convert the encoded int to a float.
|
||||
*/
|
||||
public static final FieldCache.FloatParser FIELD_CACHE_FLOAT_PARSER=new FieldCache.FloatParser(){
|
||||
public final float parseFloat(final String val) {
|
||||
return sortableIntToFloat(prefixCodedToInt(val));
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* This is a convenience method, that returns prefix coded bits of a long without
|
||||
* reducing the precision. It can be used to store the full precision value as a
|
||||
* stored field in index.
|
||||
* <p>To decode, use {@link #prefixCodedToLong}.
|
||||
*/
|
||||
public static String longToPrefixCoded(final long val) {
|
||||
return longToPrefixCoded(val, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a encoded <code>String</code> value back to a <code>long</code>,
|
||||
* auto detecting the trie encoding variant using the String length.
|
||||
* Expert: Returns prefix coded bits after reducing the precision by <code>shift</code> bits.
|
||||
* This is method is used by {@link #trieCodeLong}.
|
||||
*/
|
||||
public static final long trieCodedToLongAuto(final String s) {
|
||||
return autoDetectVariant(s).trieCodedToLong(s);
|
||||
public static String longToPrefixCoded(final long val, final int shift) {
|
||||
if (shift>63 || shift<0)
|
||||
throw new IllegalArgumentException("Illegal shift value, must be 0..63");
|
||||
int nChars = (63-shift)/7 + 1;
|
||||
final char[] arr = new char[nChars+1];
|
||||
arr[0] = (char)(SHIFT_START_LONG + shift);
|
||||
long sortableBits = val ^ 0x8000000000000000L;
|
||||
sortableBits >>>= shift;
|
||||
while (nChars>=1) {
|
||||
// Store 7 bits per character for good efficiency when UTF-8 encoding.
|
||||
// The whole number is right-justified so that lucene can prefix-encode
|
||||
// the terms more efficiently.
|
||||
arr[nChars--] = (char)(sortableBits & 0x7f);
|
||||
sortableBits >>>= 7;
|
||||
}
|
||||
return new String(arr);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a encoded <code>String</code> value back to a <code>double</code>,
|
||||
* auto detecting the trie encoding variant using the String length.
|
||||
* This is a convenience method, that returns prefix coded bits of an int without
|
||||
* reducing the precision. It can be used to store the full precision value as a
|
||||
* stored field in index.
|
||||
* <p>To decode, use {@link #prefixCodedToInt}.
|
||||
*/
|
||||
public static final double trieCodedToDoubleAuto(final String s) {
|
||||
return autoDetectVariant(s).trieCodedToDouble(s);
|
||||
public static String intToPrefixCoded(final int val) {
|
||||
return intToPrefixCoded(val, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a encoded <code>String</code> value back to a <code>Date</code>,
|
||||
* auto detecting the trie encoding variant using the String length.
|
||||
* Expert: Returns prefix coded bits after reducing the precision by <code>shift</code> bits.
|
||||
* This is method is used by {@link #trieCodeInt}.
|
||||
*/
|
||||
public static final Date trieCodedToDateAuto(final String s) {
|
||||
return autoDetectVariant(s).trieCodedToDate(s);
|
||||
public static String intToPrefixCoded(final int val, final int shift) {
|
||||
if (shift>31 || shift<0)
|
||||
throw new IllegalArgumentException("Illegal shift value, must be 0..31");
|
||||
int nChars = (31-shift)/7 + 1;
|
||||
final char[] arr = new char[nChars+1];
|
||||
arr[0] = (char)(SHIFT_START_INT + shift);
|
||||
int sortableBits = val ^ 0x80000000;
|
||||
sortableBits >>>= shift;
|
||||
while (nChars>=1) {
|
||||
// Store 7 bits per character for good efficiency when UTF-8 encoding.
|
||||
// The whole number is right-justified so that lucene can prefix-encode
|
||||
// the terms more efficiently.
|
||||
arr[nChars--] = (char)(sortableBits & 0x7f);
|
||||
sortableBits >>>= 7;
|
||||
}
|
||||
return new String(arr);
|
||||
}
|
||||
|
||||
/**
|
||||
* A factory method, that generates a {@link SortField} instance for sorting trie encoded values,
|
||||
* automatically detecting the trie encoding variant using the String length.
|
||||
* Returns a long from prefixCoded characters.
|
||||
* Rightmost bits will be zero for lower precision codes.
|
||||
* This method can be used to decode e.g. a stored field.
|
||||
* @see #longToPrefixCoded(long)
|
||||
*/
|
||||
public static final SortField getSortFieldAuto(final String field) {
|
||||
return new SortField(field, FIELD_CACHE_LONG_PARSER_AUTO);
|
||||
}
|
||||
|
||||
/**
|
||||
* A factory method, that generates a {@link SortField} instance for sorting trie encoded values,
|
||||
* automatically detecting the trie encoding variant using the String length.
|
||||
*/
|
||||
public static final SortField getSortFieldAuto(final String field, boolean reverse) {
|
||||
return new SortField(field, FIELD_CACHE_LONG_PARSER_AUTO, reverse);
|
||||
}
|
||||
|
||||
// TrieUtils instance's part
|
||||
|
||||
private TrieUtils(int bits) {
|
||||
assert 64%bits == 0;
|
||||
|
||||
// helper variable for conversion
|
||||
mask = (1L << bits) - 1L;
|
||||
|
||||
// init global "constants"
|
||||
TRIE_BITS=bits;
|
||||
TRIE_CODED_LENGTH=64/TRIE_BITS;
|
||||
TRIE_CODED_SYMBOL_MAX=(char)(TRIE_CODED_SYMBOL_MIN+mask);
|
||||
TRIE_CODED_NUMERIC_MIN=longToTrieCoded(Long.MIN_VALUE);
|
||||
TRIE_CODED_NUMERIC_MAX=longToTrieCoded(Long.MAX_VALUE);
|
||||
}
|
||||
|
||||
// internal conversion to/from strings
|
||||
|
||||
private final String internalLongToTrieCoded(long l) {
|
||||
final char[] buf=new char[TRIE_CODED_LENGTH];
|
||||
for (int i=TRIE_CODED_LENGTH-1; i>=0; i--) {
|
||||
buf[i] = (char)( TRIE_CODED_SYMBOL_MIN + (l & mask) );
|
||||
l = l >>> TRIE_BITS;
|
||||
}
|
||||
return new String(buf);
|
||||
}
|
||||
|
||||
private final long internalTrieCodedToLong(final String s) {
|
||||
if (s==null) throw new NullPointerException("Trie encoded string may not be NULL");
|
||||
final int len=s.length();
|
||||
if (len!=TRIE_CODED_LENGTH) throw new NumberFormatException(
|
||||
"Invalid trie encoded numerical value representation (incompatible length, must be "+TRIE_CODED_LENGTH+")"
|
||||
);
|
||||
long l=0L;
|
||||
for (int i=0; i<len; i++) {
|
||||
char ch=s.charAt(i);
|
||||
if (ch>=TRIE_CODED_SYMBOL_MIN && ch<=TRIE_CODED_SYMBOL_MAX) {
|
||||
l = (l << TRIE_BITS) | (long)(ch-TRIE_CODED_SYMBOL_MIN);
|
||||
} else {
|
||||
public static long prefixCodedToLong(final String prefixCoded) {
|
||||
final int len = prefixCoded.length();
|
||||
final int shift = prefixCoded.charAt(0)-SHIFT_START_LONG;
|
||||
if (shift>63 || shift<0)
|
||||
throw new NumberFormatException("Invalid shift value in prefixCoded string (is encoded value really a LONG?)");
|
||||
long sortableBits = 0L;
|
||||
for (int i=1; i<len; i++) {
|
||||
sortableBits <<= 7;
|
||||
final char ch = prefixCoded.charAt(i);
|
||||
if (ch>0x7f) {
|
||||
throw new NumberFormatException(
|
||||
"Invalid trie encoded numerical value representation (char "+
|
||||
"Invalid prefixCoded numerical value representation (char "+
|
||||
Integer.toHexString((int)ch)+" at position "+i+" is invalid)"
|
||||
);
|
||||
}
|
||||
sortableBits |= (long)(ch & 0x7f);
|
||||
}
|
||||
return l;
|
||||
return (sortableBits << shift) ^ 0x8000000000000000L;
|
||||
}
|
||||
|
||||
// Long's
|
||||
|
||||
/** Converts a <code>long</code> value encoded to a <code>String</code>. */
|
||||
public String longToTrieCoded(final long l) {
|
||||
return internalLongToTrieCoded(l ^ 0x8000000000000000L);
|
||||
}
|
||||
|
||||
/** Converts a encoded <code>String</code> value back to a <code>long</code>. */
|
||||
public long trieCodedToLong(final String s) {
|
||||
return internalTrieCodedToLong(s) ^ 0x8000000000000000L;
|
||||
}
|
||||
|
||||
// Double's
|
||||
|
||||
/** Converts a <code>double</code> value encoded to a <code>String</code>. */
|
||||
public String doubleToTrieCoded(final double d) {
|
||||
long l=Double.doubleToLongBits(d);
|
||||
if ((l & 0x8000000000000000L) == 0L) {
|
||||
// >0
|
||||
l |= 0x8000000000000000L;
|
||||
} else {
|
||||
// <0
|
||||
l = ~l;
|
||||
}
|
||||
return internalLongToTrieCoded(l);
|
||||
}
|
||||
|
||||
/** Converts a encoded <code>String</code> value back to a <code>double</code>. */
|
||||
public double trieCodedToDouble(final String s) {
|
||||
long l=internalTrieCodedToLong(s);
|
||||
if ((l & 0x8000000000000000L) != 0L) {
|
||||
// >0
|
||||
l &= 0x7fffffffffffffffL;
|
||||
} else {
|
||||
// <0
|
||||
l = ~l;
|
||||
}
|
||||
return Double.longBitsToDouble(l);
|
||||
}
|
||||
|
||||
// Date's
|
||||
|
||||
/** Converts a <code>Date</code> value encoded to a <code>String</code>. */
|
||||
public String dateToTrieCoded(final Date d) {
|
||||
return longToTrieCoded(d.getTime());
|
||||
}
|
||||
|
||||
/** Converts a encoded <code>String</code> value back to a <code>Date</code>. */
|
||||
public Date trieCodedToDate(final String s) {
|
||||
return new Date(trieCodedToLong(s));
|
||||
}
|
||||
|
||||
// increment / decrement
|
||||
|
||||
/** Increments an encoded String value by 1. Needed by {@link TrieRangeFilter}. */
|
||||
public String incrementTrieCoded(final String v) {
|
||||
final int l=v.length();
|
||||
final char[] buf=new char[l];
|
||||
boolean inc=true;
|
||||
for (int i=l-1; i>=0; i--) {
|
||||
int b=v.charAt(i)-TRIE_CODED_SYMBOL_MIN;
|
||||
if (inc) b++;
|
||||
if (inc=(b>(int)mask)) b=0;
|
||||
buf[i]=(char)(TRIE_CODED_SYMBOL_MIN+b);
|
||||
}
|
||||
return new String(buf);
|
||||
}
|
||||
|
||||
/** Decrements an encoded String value by 1. Needed by {@link TrieRangeFilter}. */
|
||||
public String decrementTrieCoded(final String v) {
|
||||
final int l=v.length();
|
||||
final char[] buf=new char[l];
|
||||
boolean dec=true;
|
||||
for (int i=l-1; i>=0; i--) {
|
||||
int b=v.charAt(i)-TRIE_CODED_SYMBOL_MIN;
|
||||
if (dec) b--;
|
||||
if (dec=(b<0)) b=(int)mask;
|
||||
buf[i]=(char)(TRIE_CODED_SYMBOL_MIN+b);
|
||||
}
|
||||
return new String(buf);
|
||||
}
|
||||
|
||||
private void addConvertedTrieCodedDocumentField(
|
||||
final Document ldoc, final String fieldname, final String val,
|
||||
final boolean index, final Field.Store store
|
||||
) {
|
||||
Field f=new Field(fieldname, val, store, index?Field.Index.NOT_ANALYZED_NO_NORMS:Field.Index.NO);
|
||||
if (index) {
|
||||
f.setOmitTf(true);
|
||||
ldoc.add(f);
|
||||
// add the lower precision values in the helper field with prefix
|
||||
final StringBuffer sb=new StringBuffer(TRIE_CODED_LENGTH);
|
||||
synchronized(sb) {
|
||||
for (int i=TRIE_CODED_LENGTH-1; i>0; i--) {
|
||||
sb.setLength(0);
|
||||
f=new Field(
|
||||
fieldname + LOWER_PRECISION_FIELD_NAME_SUFFIX,
|
||||
sb.append( (char)(TRIE_CODED_PADDING_START+i) ).append( val.substring(0,i) ).toString(),
|
||||
Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS
|
||||
/**
|
||||
* Returns an int from prefixCoded characters.
|
||||
* Rightmost bits will be zero for lower precision codes.
|
||||
* This method can be used to decode e.g. a stored field.
|
||||
* @see #intToPrefixCoded(int)
|
||||
*/
|
||||
public static int prefixCodedToInt(final String prefixCoded) {
|
||||
final int len = prefixCoded.length();
|
||||
final int shift = prefixCoded.charAt(0)-SHIFT_START_INT;
|
||||
if (shift>31 || shift<0)
|
||||
throw new NumberFormatException("Invalid shift value in prefixCoded string (is encoded value really an INT?)");
|
||||
int sortableBits = 0;
|
||||
for (int i=1; i<len; i++) {
|
||||
sortableBits <<= 7;
|
||||
final char ch = prefixCoded.charAt(i);
|
||||
if (ch>0x7f) {
|
||||
throw new NumberFormatException(
|
||||
"Invalid prefixCoded numerical value representation (char "+
|
||||
Integer.toHexString((int)ch)+" at position "+i+" is invalid)"
|
||||
);
|
||||
f.setOmitTf(true);
|
||||
ldoc.add(f);
|
||||
}
|
||||
sortableBits |= (int)(ch & 0x7f);
|
||||
}
|
||||
} else {
|
||||
ldoc.add(f);
|
||||
}
|
||||
return (sortableBits << shift) ^ 0x80000000;
|
||||
}
|
||||
|
||||
/**
|
||||
* Stores a double value in trie-form in document for indexing.
|
||||
* <p>To store the different precisions of the long values (from one byte [only the most significant one] to the full eight bytes),
|
||||
* each lower precision is prefixed by the length ({@link #TRIE_CODED_PADDING_START}<code>+precision</code>),
|
||||
* in an extra "helper" field with a name of <code>fieldname+{@link #LOWER_PRECISION_FIELD_NAME_SUFFIX}</code>
|
||||
* (i.e. fieldname "numeric" => lower precision's name "numeric#trie").
|
||||
* The full long is not prefixed at all and indexed and stored according to the given flags in the original field name.
|
||||
* If the field should not be searchable, set <code>index</code> to <code>false</code>. It is then only stored (for convenience).
|
||||
* Fields added to a document using this method can be queried by {@link TrieRangeQuery}.
|
||||
* Converts a <code>double</code> value to a sortable signed <code>long</code>.
|
||||
* The value is converted by getting their IEEE 754 floating-point "double format"
|
||||
* bit layout and then some bits are swapped, to be able to compare the result as long.
|
||||
* By this the precision is not reduced, but the value can easily used as a long.
|
||||
* @see #sortableLongToDouble
|
||||
*/
|
||||
public void addDoubleTrieCodedDocumentField(
|
||||
final Document ldoc, final String fieldname, final double val,
|
||||
final boolean index, final Field.Store store
|
||||
) {
|
||||
addConvertedTrieCodedDocumentField(ldoc, fieldname, doubleToTrieCoded(val), index, store);
|
||||
public static long doubleToSortableLong(double val) {
|
||||
long f = Double.doubleToLongBits(val);
|
||||
if (f<0) f ^= 0x7fffffffffffffffL;
|
||||
return f;
|
||||
}
|
||||
|
||||
/**
|
||||
* Stores a Date value in trie-form in document for indexing.
|
||||
* <p>To store the different precisions of the long values (from one byte [only the most significant one] to the full eight bytes),
|
||||
* each lower precision is prefixed by the length ({@link #TRIE_CODED_PADDING_START}<code>+precision</code>),
|
||||
* in an extra "helper" field with a name of <code>fieldname+{@link #LOWER_PRECISION_FIELD_NAME_SUFFIX}</code>
|
||||
* (i.e. fieldname "numeric" => lower precision's name "numeric#trie").
|
||||
* The full long is not prefixed at all and indexed and stored according to the given flags in the original field name.
|
||||
* If the field should not be searchable, set <code>index</code> to <code>false</code>. It is then only stored (for convenience).
|
||||
* Fields added to a document using this method can be queried by {@link TrieRangeQuery}.
|
||||
* Converts a sortable <code>long</code> back to a <code>double</code>.
|
||||
* @see #doubleToSortableLong
|
||||
*/
|
||||
public void addDateTrieCodedDocumentField(
|
||||
final Document ldoc, final String fieldname,
|
||||
final Date val, final boolean index, final Field.Store store
|
||||
) {
|
||||
addConvertedTrieCodedDocumentField(ldoc, fieldname, dateToTrieCoded(val), index, store);
|
||||
public static double sortableLongToDouble(long val) {
|
||||
if (val<0) val ^= 0x7fffffffffffffffL;
|
||||
return Double.longBitsToDouble(val);
|
||||
}
|
||||
|
||||
/**
|
||||
* Stores a long value in trie-form in document for indexing.
|
||||
* <p>To store the different precisions of the long values (from one byte [only the most significant one] to the full eight bytes),
|
||||
* each lower precision is prefixed by the length ({@link #TRIE_CODED_PADDING_START}<code>+precision</code>),
|
||||
* in an extra "helper" field with a name of <code>fieldname+{@link #LOWER_PRECISION_FIELD_NAME_SUFFIX}</code>
|
||||
* (i.e. fieldname "numeric" => lower precision's name "numeric#trie").
|
||||
* The full long is not prefixed at all and indexed and stored according to the given flags in the original field name.
|
||||
* If the field should not be searchable, set <code>index</code> to <code>false</code>. It is then only stored (for convenience).
|
||||
* Fields added to a document using this method can be queried by {@link TrieRangeQuery}.
|
||||
* Converts a <code>float</code> value to a sortable signed <code>int</code>.
|
||||
* The value is converted by getting their IEEE 754 floating-point "float format"
|
||||
* bit layout and then some bits are swapped, to be able to compare the result as int.
|
||||
* By this the precision is not reduced, but the value can easily used as an int.
|
||||
* @see #sortableIntToFloat
|
||||
*/
|
||||
public void addLongTrieCodedDocumentField(
|
||||
final Document ldoc, final String fieldname,
|
||||
final long val, final boolean index, final Field.Store store
|
||||
) {
|
||||
addConvertedTrieCodedDocumentField(ldoc, fieldname, longToTrieCoded(val), index, store);
|
||||
public static int floatToSortableInt(float val) {
|
||||
int f = Float.floatToIntBits(val);
|
||||
if (f<0) f ^= 0x7fffffff;
|
||||
return f;
|
||||
}
|
||||
|
||||
/** A factory method, that generates a {@link SortField} instance for sorting trie encoded values. */
|
||||
public SortField getSortField(final String field) {
|
||||
return new SortField(field, FIELD_CACHE_LONG_PARSER);
|
||||
/**
|
||||
* Converts a sortable <code>int</code> back to a <code>float</code>.
|
||||
* @see #floatToSortableInt
|
||||
*/
|
||||
public static float sortableIntToFloat(int val) {
|
||||
if (val<0) val ^= 0x7fffffff;
|
||||
return Float.intBitsToFloat(val);
|
||||
}
|
||||
|
||||
/** A factory method, that generates a {@link SortField} instance for sorting trie encoded values. */
|
||||
public SortField getSortField(final String field, boolean reverse) {
|
||||
/** A factory method, that generates a {@link SortField} instance for sorting prefix encoded long values. */
|
||||
public static SortField getLongSortField(final String field, boolean reverse) {
|
||||
return new SortField(field, FIELD_CACHE_LONG_PARSER, reverse);
|
||||
}
|
||||
|
||||
/** A parser instance for filling a {@link ExtendedFieldCache}, that parses trie encoded fields as longs. */
|
||||
public final ExtendedFieldCache.LongParser FIELD_CACHE_LONG_PARSER=new ExtendedFieldCache.LongParser(){
|
||||
public final long parseLong(String val) {
|
||||
return trieCodedToLong(val);
|
||||
/** A factory method, that generates a {@link SortField} instance for sorting prefix encoded int values. */
|
||||
public static SortField getIntSortField(final String field, boolean reverse) {
|
||||
return new SortField(field, FIELD_CACHE_INT_PARSER, reverse);
|
||||
}
|
||||
};
|
||||
|
||||
/** A parser instance for filling a {@link ExtendedFieldCache}, that parses trie encoded fields as doubles. */
|
||||
public final ExtendedFieldCache.DoubleParser FIELD_CACHE_DOUBLE_PARSER=new ExtendedFieldCache.DoubleParser(){
|
||||
public final double parseDouble(String val) {
|
||||
return trieCodedToDouble(val);
|
||||
/**
|
||||
* Returns a sequence of trie coded numbers suitable for {@link LongTrieRangeFilter}.
|
||||
* Each successive string in the list has had it's precision reduced by <code>precisionStep</code>.
|
||||
* For sorting, index the first full-precision value into a separate field and the
|
||||
* remaining values into another field.
|
||||
* <p>To achieve this, use {@link #addIndexedFields(Document,String,String[])}.
|
||||
*/
|
||||
public static String[] trieCodeLong(long val, int precisionStep) {
|
||||
if (precisionStep<1 || precisionStep>64)
|
||||
throw new IllegalArgumentException("precisionStep may only be 1..64");
|
||||
String[] arr = new String[63/precisionStep+1];
|
||||
int idx = 0;
|
||||
for (int shift=0; shift<64; shift+=precisionStep) {
|
||||
arr[idx++] = longToPrefixCoded(val, shift);
|
||||
}
|
||||
return arr;
|
||||
}
|
||||
};
|
||||
|
||||
private final long mask;
|
||||
/**
|
||||
* Returns a sequence of trie coded numbers suitable for {@link IntTrieRangeFilter}.
|
||||
* Each successive string in the list has had it's precision reduced by <code>precisionStep</code>.
|
||||
* For sorting, index the first full-precision value into a separate field and the
|
||||
* remaining values into another field.
|
||||
* <p>To achieve this, use {@link #addIndexedFields(Document,String,String[])}.
|
||||
*/
|
||||
public static String[] trieCodeInt(int val, int precisionStep) {
|
||||
if (precisionStep<1 || precisionStep>32)
|
||||
throw new IllegalArgumentException("precisionStep may only be 1..32");
|
||||
String[] arr = new String[31/precisionStep+1];
|
||||
int idx = 0;
|
||||
for (int shift=0; shift<32; shift+=precisionStep) {
|
||||
arr[idx++] = intToPrefixCoded(val, shift);
|
||||
}
|
||||
return arr;
|
||||
}
|
||||
|
||||
/** Number of bits used in this trie variant (2, 4, or 8) */
|
||||
public final int TRIE_BITS;
|
||||
/**
|
||||
* Indexes the full precision value only in the main field (for sorting), and indexes all other
|
||||
* lower precision values in <code>field+LOWER_PRECISION_FIELD_NAME_SUFFIX</code>.
|
||||
* <p><b>This is the recommended variant to add trie fields to the index.</b>
|
||||
* By this it is possible to sort the field using a <code>SortField</code> instance
|
||||
* returned by {@link #getLongSortField} or {@link #getIntSortField}.
|
||||
* <p>This method does not store the fields and saves no term frequency or norms
|
||||
* (which are normally not needed for trie fields). If you want to additionally store
|
||||
* the value, you can use the normal methods of {@link Document} to achive this, just specify
|
||||
* <code>Field.Store.YES</code>, <code>Field.Index.NO</code> and the same field name.
|
||||
* <p>Examples:
|
||||
* <pre>
|
||||
* addIndexedFields(doc, "mydouble", trieCodeLong(doubleToSortableLong(1.414d), 4));
|
||||
* addIndexedFields(doc, "mylong", trieCodeLong(123456L, 4));
|
||||
* </pre>
|
||||
**/
|
||||
public static void addIndexedFields(Document doc, String field, String[] trieCoded) {
|
||||
addIndexedFields(doc, new String[]{field, field+LOWER_PRECISION_FIELD_NAME_SUFFIX}, trieCoded);
|
||||
}
|
||||
|
||||
/** Length (in chars) of an encoded value (8, 16, or 32 chars) */
|
||||
public final int TRIE_CODED_LENGTH;
|
||||
/**
|
||||
* Expert: Indexes the full precision value only in the main field (for sorting), and indexes all other
|
||||
* lower precision values in the <code>lowerPrecision</code> field.
|
||||
* If you do not specify the same field name for the main and lower precision one,
|
||||
* it is possible to sort the field using a <code>SortField</code> instance
|
||||
* returned by {@link #getLongSortField} or {@link #getIntSortField}.
|
||||
* <p>This method does not store the fields and saves no term frequency or norms
|
||||
* (which are normally not needed for trie fields). If you want to additionally store
|
||||
* the value, you can use the normal methods of {@link Document} to achive this, just specify
|
||||
* <code>Field.Store.YES</code>, <code>Field.Index.NO</code> and the same main field name.
|
||||
* <p>Examples:
|
||||
* <pre>
|
||||
* addIndexedFields(doc, "mydouble", "mydoubletrie", trieCodeLong(doubleToSortableLong(1.414d), 4));
|
||||
* addIndexedFields(doc, "mylong", "mylongtrie", trieCodeLong(123456L, 4));
|
||||
* </pre>
|
||||
* @see #addIndexedFields(Document,String,String[])
|
||||
**/
|
||||
public static void addIndexedFields(Document doc, String field, String lowerPrecisionField, String[] trieCoded) {
|
||||
addIndexedFields(doc, new String[]{field, lowerPrecisionField}, trieCoded);
|
||||
}
|
||||
|
||||
/** Character used as upper end (depends on trie bits, its <code>{@link #TRIE_CODED_SYMBOL_MIN}+2^{@link #TRIE_BITS}-1</code>) */
|
||||
public final char TRIE_CODED_SYMBOL_MAX;
|
||||
/**
|
||||
* Expert: Indexes a series of trie coded values into a lucene {@link Document}
|
||||
* using the given field names.
|
||||
* If the array of field names is shorter than the trie coded one, all trie coded
|
||||
* values with higher index get the last field name.
|
||||
* <p>This method does not store the fields and saves no term frequency or norms
|
||||
* (which are normally not needed for trie fields). If you want to additionally store
|
||||
* the value, you can use the normal methods of {@link Document} to achive this, just specify
|
||||
* <code>Field.Store.YES</code>, <code>Field.Index.NO</code> and the same main field name.
|
||||
**/
|
||||
public static void addIndexedFields(Document doc, String[] fields, String[] trieCoded) {
|
||||
for (int i=0; i<trieCoded.length; i++) {
|
||||
final int fnum = Math.min(fields.length-1, i);
|
||||
final Field f = new Field(fields[fnum], trieCoded[i], Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS);
|
||||
f.setOmitTf(true);
|
||||
doc.add(f);
|
||||
}
|
||||
}
|
||||
|
||||
/** minimum encoded value of a numerical index entry: {@link Long#MIN_VALUE} */
|
||||
public final String TRIE_CODED_NUMERIC_MIN;
|
||||
/**
|
||||
* Expert: Splits a long range recursively.
|
||||
* You may implement a builder that adds clauses to a
|
||||
* {@link org.apache.lucene.search.BooleanQuery} for each call to its
|
||||
* {@link IntRangeBuilder#addRange(String,String,int)}
|
||||
* method.
|
||||
* <p>This method is used by {@link LongTrieRangeFilter}.
|
||||
*/
|
||||
public static void splitLongRange(final LongRangeBuilder builder,
|
||||
final int precisionStep, final long minBound, final long maxBound
|
||||
) {
|
||||
if (precisionStep<1 || precisionStep>64)
|
||||
throw new IllegalArgumentException("precisionStep may only be 1..64");
|
||||
splitRange(
|
||||
builder, 64, precisionStep, minBound, maxBound,
|
||||
0 /* start with no shift */
|
||||
);
|
||||
}
|
||||
|
||||
/** maximum encoded value of a numerical index entry: {@link Long#MAX_VALUE} */
|
||||
public final String TRIE_CODED_NUMERIC_MAX;
|
||||
/**
|
||||
* Expert: Splits an int range recursively.
|
||||
* You may implement a builder that adds clauses to a
|
||||
* {@link org.apache.lucene.search.BooleanQuery} for each call to its
|
||||
* {@link IntRangeBuilder#addRange(String,String,int)}
|
||||
* method.
|
||||
* <p>This method is used by {@link IntTrieRangeFilter}.
|
||||
*/
|
||||
public static void splitIntRange(final IntRangeBuilder builder,
|
||||
final int precisionStep, final int minBound, final int maxBound
|
||||
) {
|
||||
if (precisionStep<1 || precisionStep>32)
|
||||
throw new IllegalArgumentException("precisionStep may only be 1..32");
|
||||
splitRange(
|
||||
builder, 32, precisionStep, (long)minBound, (long)maxBound,
|
||||
0 /* start with no shift */
|
||||
);
|
||||
}
|
||||
|
||||
/** This helper does the splitting for both 32 and 64 bit. */
|
||||
private static void splitRange(
|
||||
final Object builder, final int valSize,
|
||||
final int precisionStep, final long minBound, final long maxBound,
|
||||
final int shift
|
||||
) {
|
||||
// calculate new bounds for inner precision
|
||||
final long diff = 1L << (shift+precisionStep),
|
||||
mask = ((1L<<precisionStep) - 1L) << shift;
|
||||
final boolean
|
||||
hasLower = (minBound & mask) != 0L,
|
||||
hasUpper = (maxBound & mask) != mask;
|
||||
final long
|
||||
nextMinBound = (hasLower ? (minBound + diff) : minBound) & ~mask,
|
||||
nextMaxBound = (hasUpper ? (maxBound - diff) : maxBound) & ~mask;
|
||||
|
||||
if (shift+precisionStep>=valSize || nextMinBound>nextMaxBound) {
|
||||
// We are in the lowest precision or the next precision is not available.
|
||||
addRange(builder, valSize, precisionStep, minBound, maxBound, shift);
|
||||
} else {
|
||||
if (hasLower)
|
||||
addRange(builder, valSize, precisionStep, minBound, minBound | mask, shift);
|
||||
if (hasUpper)
|
||||
addRange(builder, valSize, precisionStep, maxBound & ~mask, maxBound, shift);
|
||||
// recurse down to next precision
|
||||
splitRange(
|
||||
builder, valSize, precisionStep,
|
||||
nextMinBound, nextMaxBound,
|
||||
shift+precisionStep
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/** Helper that delegates to correct range builder */
|
||||
private static void addRange(
|
||||
final Object builder, final int valSize,
|
||||
final int precisionStep, long minBound, long maxBound,
|
||||
final int shift
|
||||
) {
|
||||
// for the max bound set all lower bits (that were shifted away):
|
||||
// this is important for testing or other usages of the splitted range
|
||||
// (e.g. to reconstruct the full range). The prefixEncoding will remove
|
||||
// the bits anyway, so they do not hurt!
|
||||
maxBound |= (1L << shift) - 1L;
|
||||
// delegate to correct range builder
|
||||
switch(valSize) {
|
||||
case 64:
|
||||
((LongRangeBuilder)builder).addRange(precisionStep, minBound, maxBound, shift);
|
||||
break;
|
||||
case 32:
|
||||
((IntRangeBuilder)builder).addRange(precisionStep, (int)minBound, (int)maxBound, shift);
|
||||
break;
|
||||
default:
|
||||
// Should not happen!
|
||||
throw new IllegalArgumentException("valSize must be 32 or 64.");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert: Callback for {@link #splitLongRange}.
|
||||
* You need to overwrite only one of the methods.
|
||||
*/
|
||||
public static abstract class LongRangeBuilder {
|
||||
|
||||
/**
|
||||
* Overwrite this method, if you like to receive the already prefix encoded range bounds.
|
||||
* You can directly build classical range queries from them.
|
||||
* The level gives the precision level (0 = highest precision) of the encoded values.
|
||||
* This parameter could be used as an index to an array of fieldnames like the
|
||||
* parameters to {@link #addIndexedFields(Document,String[],String[])} for specifying
|
||||
* the field names for each precision:
|
||||
* <pre>
|
||||
* String field = fields[Math.min(fields.length-1, level)];
|
||||
* </pre>
|
||||
*/
|
||||
public void addRange(String minPrefixCoded, String maxPrefixCoded, int level) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
/**
|
||||
* Overwrite this method, if you like to receive the raw long range bounds.
|
||||
* You can use this for e.g. debugging purposes (print out range bounds).
|
||||
*/
|
||||
public void addRange(final int precisionStep, final long min, final long max, final int shift) {
|
||||
/*System.out.println(Long.toHexString((min^0x8000000000000000L) >>> shift)+".."+
|
||||
Long.toHexString((max^0x8000000000000000L) >>> shift));*/
|
||||
addRange(longToPrefixCoded(min, shift), longToPrefixCoded(max, shift), shift/precisionStep);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert: Callback for {@link #splitIntRange}.
|
||||
* You need to overwrite only one of the methods.
|
||||
*/
|
||||
public static abstract class IntRangeBuilder {
|
||||
|
||||
/**
|
||||
* Overwrite this method, if you like to receive the already prefix encoded range bounds.
|
||||
* You can directly build classical range queries from them.
|
||||
* The level gives the precision level (0 = highest precision) of the encoded values.
|
||||
* This parameter could be used as an index to an array of fieldnames like the
|
||||
* parameters to {@link #addIndexedFields(Document,String[],String[])} for specifying
|
||||
* the field names for each precision:
|
||||
* <pre>
|
||||
* String field = fields[Math.min(fields.length-1, level)];
|
||||
* </pre>
|
||||
*/
|
||||
public void addRange(String minPrefixCoded, String maxPrefixCoded, int level) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
/**
|
||||
* Overwrite this method, if you like to receive the raw int range bounds.
|
||||
* You can use this for e.g. debugging purposes (print out range bounds).
|
||||
*/
|
||||
public void addRange(final int precisionStep, final int min, final int max, final int shift) {
|
||||
/*System.out.println(Integer.toHexString((min^0x80000000) >>> shift)+".."+
|
||||
Integer.toHexString((max^0x80000000) >>> shift));*/
|
||||
addRange(intToPrefixCoded(min, shift), intToPrefixCoded(max, shift), shift/precisionStep);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
<html>
|
||||
<body>
|
||||
<p>This package provides fast numeric range queries/filters on <code>long</code>, <code>double</code> or <code>Date</code>
|
||||
fields based on trie structures.</p>
|
||||
<p>This package provides fast numeric range queries/filters on <code>long</code>, <code>double</code>, <code>int</code>,
|
||||
or <code>float</code> (and other data types, that can be converted to numerical values) fields based on trie structures.</p>
|
||||
|
||||
<h3>How it works</h3>
|
||||
<p>See the publication about <a target="_blank" href="http://www.panfmp.org">panFMP</a>, where this algorithm was described:
|
||||
|
@ -14,66 +14,96 @@ Computers & Geosciences 34 (12), 1947-1955.
|
|||
it cannot handle numerical ranges (e.g., field value is inside user defined bounds, even dates are numerical values).
|
||||
We have developed an extension to Apache Lucene that stores
|
||||
the numerical values in a special string-encoded format with variable precision
|
||||
(all numerical values like doubles, longs, and timestamps are converted to lexicographic sortable string representations
|
||||
and stored with different precisions from one byte to the full 8 bytes - depending on the variant used).
|
||||
For a more detailed description of how the values are stored, see {@link org.apache.lucene.search.trie.TrieUtils}.
|
||||
A range is then divided recursively into multiple intervals for searching:
|
||||
The center of the range is searched only with the lowest possible precision in the trie, while the boundaries are matched
|
||||
more exactly. This reduces the number of terms dramatically.</p>
|
||||
(all numerical values like doubles, longs, floats, and ints are converted to lexicographic sortable string representations
|
||||
and stored with different precisions. For a more detailed description of how the values are stored,
|
||||
see {@link org.apache.lucene.search.trie.TrieUtils}. A range is then divided recursively into
|
||||
multiple intervals for searching:
|
||||
The center of the range is searched only with the lowest possible precision in the trie,
|
||||
while the boundaries are matched more exactly. This reduces the number of terms dramatically.</p>
|
||||
|
||||
<p>For the variant that uses a lowest precision of 1-byte the index
|
||||
contains only a maximum of 256 distinct values in the lowest precision.
|
||||
Overall, a range could consist of a theoretical maximum of
|
||||
<p>For the variant that stores long values in 8 different precisions (each reduced by 8 bits) that
|
||||
uses a lowest precision of 1 byte, the index contains only a maximum of 256 distinct values in the
|
||||
lowest precision. Overall, a range could consist of a theoretical maximum of
|
||||
<code>7*255*2 + 255 = 3825</code> distinct terms (when there is a term for every distinct value of an
|
||||
8-byte-number in the index and the range covers all of them; a maximum of 255 distinct values is used
|
||||
8-byte-number in the index and the range covers almost all of them; a maximum of 255 distinct values is used
|
||||
because it would always be possible to reduce the full 256 values to one term with degraded precision).
|
||||
In practise, we have seen up to 300 terms in most cases (index with 500,000 metadata records
|
||||
and a uniform value distribution).</p>
|
||||
|
||||
<p>There are two other variants of encoding: 4bit and 2bit. Each variant stores more different precisions
|
||||
of the longs and thus needs more storage space (because it generates more and longer terms -
|
||||
4bit: two times the length and number of terms; 2bit: four times the length and number of terms).
|
||||
But on the other hand, the maximum number of distinct terms used for range queries is
|
||||
<code>15*15*2 + 15 = 465</code> for the 4bit variant, and
|
||||
<code>31*3*2 + 3 = 189</code> for the 2bit variant.</p>
|
||||
<p>You can choose any <code>precisionStep</code> when encoding integer values.
|
||||
Lower step values mean more precisions and so more terms in index (and index gets larger).
|
||||
On the other hand, the maximum number of terms to match reduces, which optimized query speed.
|
||||
The formula to calculate the maximum term count is:
|
||||
<pre>
|
||||
n = [ (bitsPerValue/precisionStep - 1) * (2^precisionStep - 1 ) * 2 ] + (2^precisionStep - 1 )
|
||||
</pre>
|
||||
<p><em>(this formula is only correct, when <code>bitsPerValue/precisionStep</code> is an integer;
|
||||
in other cases, the value must be rounded up and the last summand must contain the modulo of the division as
|
||||
precision step)</em>.
|
||||
For longs stored using a precision step of 4, <code>n = 15*15*2 + 15 = 465</code>, and for a precision
|
||||
step of 2, <code>n = 31*3*2 + 3 = 189</code>. But the faster search speed is reduced by more seeking
|
||||
in the term enum of the index. Because of this, the ideal <code>precisionStep</code> value can only
|
||||
be found out by testing. <b>Important:</b> You can index with a lower precision step value and test search speed
|
||||
using a multiple of the original step value.</p>
|
||||
|
||||
<p>This dramatically improves the performance of Apache Lucene with range queries, which
|
||||
are no longer dependent on the index size and the number of distinct values because there is
|
||||
an upper limit unrelated to either of these properties.</p>
|
||||
|
||||
<h3>Usage</h3>
|
||||
<p>To use the new query types the numerical values, which may be <code>long</code>, <code>double</code> or <code>Date</code>,
|
||||
the values must be stored during indexing in a special format in the index (using {@link org.apache.lucene.search.trie.TrieUtils}).
|
||||
This can be done like this:</p>
|
||||
<p>To use the new query types the numerical values, which may be<code>long</code>, <code>double</code>, <code>int</code>,
|
||||
<code>float</code>, or <code>Date</code>, the values must be indexed in a special prefix encoded format
|
||||
(using {@link org.apache.lucene.search.trie.TrieUtils}). This can be done like this:</p>
|
||||
|
||||
<pre>
|
||||
<em>// chose a step value, 8 is a general good value for large indexes:</em>
|
||||
int precisionStep = 8;
|
||||
|
||||
Document doc = new Document();
|
||||
// add some standard fields:
|
||||
|
||||
<em>// add some standard fields:</em>
|
||||
String svalue = "anything to index";
|
||||
doc.add(new Field("exampleString", svalue, Field.Store.YES, Field.Index.ANALYZED) ;
|
||||
// add some numerical fields:
|
||||
double fvalue = 1.057E17;
|
||||
TrieUtils.VARIANT_8BIT.addDoubleTrieCodedDocumentField(doc, "exampleDouble", fvalue, true /* index the field */, Field.Store.YES);
|
||||
doc.add(new Field("exampleString", svalue, Field.Store.YES, Field.Index.ANALYZED));
|
||||
|
||||
<em>// add some numerical fields:</em>
|
||||
long lvalue = 121345L;
|
||||
TrieUtils.VARIANT_8BIT.addLongTrieCodedDocumentField(doc, "exampleLong", lvalue, true /* index the field */, Field.Store.YES);
|
||||
Date dvalue = new Date(); // actual time
|
||||
TrieUtils.VARIANT_8BIT.addDateTrieCodedDocumentField(doc, "exampleDate", dvalue, true /* index the field */, Field.Store.YES);
|
||||
// add document to IndexWriter
|
||||
TrieUtils.addIndexedFields(doc, "exampleLong", TrieUtils.trieCodeLong(lvalue, precisionStep));
|
||||
double dvalue = 1.057E17;
|
||||
TrieUtils.addIndexedFields(doc, "exampleDouble", TrieUtils.trieCodeLong(TrieUtils.doubleToSortableLong(dvalue), precisionStep));
|
||||
int ivalue = 121345;
|
||||
TrieUtils.addIndexedFields(doc, "exampleInt", TrieUtils.trieCodeInt(ivalue, precisionStep));
|
||||
float fvalue = 1.057E17f;
|
||||
TrieUtils.addIndexedFields(doc, "exampleFloat", TrieUtils.trieCodeInt(TrieUtils.floatToSortableInt(fvalue), precisionStep));
|
||||
Date datevalue = new Date(); <em>// actual time</em>
|
||||
TrieUtils.addIndexedFields(doc, "exampleDate", TrieUtils.trieCodeLong(datevalue.getTime(), precisionStep));
|
||||
|
||||
<em>// if you want to also store one of the values:</em>
|
||||
doc.add(new Field("exampleLong", Long.toString(lvalue), Field.Store.YES, Field.Index.NO));
|
||||
|
||||
<em>// or as encoded value:</em>
|
||||
doc.add(new Field("exampleLong2", TrieUtils.longToPrefixCoded(lvalue), Field.Store.YES, Field.Index.NO));
|
||||
|
||||
<em>// now add document to IndexWriter, as usual</em>
|
||||
</pre>
|
||||
|
||||
<p>The numeric index fields you prepared in this way can be searched by {@link org.apache.lucene.search.trie.TrieRangeQuery}:</p>
|
||||
<p>The numeric index fields you prepared in this way can be searched by
|
||||
{@link org.apache.lucene.search.trie.LongTrieRangeFilter} or {@link org.apache.lucene.search.trie.IntTrieRangeFilter}:</p>
|
||||
|
||||
<pre>
|
||||
// Java 1.4, because Double.valueOf(double) is not available:
|
||||
Query q = new TrieRangeQuery("exampleDouble", new Double(1.0E17), new Double(2.0E17), TrieUtils.VARIANT_8BIT);
|
||||
// OR, Java 1.5, using autoboxing:
|
||||
Query q = new TrieRangeQuery("exampleDouble", 1.0E17, 2.0E17, TrieUtils.VARIANT_8BIT);
|
||||
<em>// Java 1.4, because Long.valueOf(long) is not available:</em>
|
||||
Query q = new LongTrieRangeFilter("exampleLong", precisionStep, new Long(123L), new Long(999999L), true, true).asQuery();
|
||||
|
||||
<em>// OR, Java 1.5, using autoboxing:</em>
|
||||
Query q = new LongTrieRangeFilter("exampleLong", precisionStep, 123L, 999999L, true, true).asQuery();
|
||||
|
||||
<em>// execute the search, as usual:</em>
|
||||
TopDocs docs = searcher.search(q, 10);
|
||||
for (int i = 0; i<docs.scoreDocs.length; i++) {
|
||||
Document doc = searcher.doc(docs.scoreDocs[i].doc);
|
||||
System.out.println(doc.get("exampleString"));
|
||||
// decode the stored numerical value (important!!!):
|
||||
System.out.println(TrieUtils.VARIANT_8BIT.trieCodedToDouble(doc.get("exampleDouble")));
|
||||
|
||||
<em>// decode a prefix coded, stored field:</em>
|
||||
System.out.println(TrieUtils.prefixCodedToLong(doc.get("exampleLong2")));
|
||||
}
|
||||
</pre>
|
||||
|
||||
|
@ -82,9 +112,9 @@ This can be done like this:</p>
|
|||
<p>Comparisions of the different types of RangeQueries on an index with about 500,000 docs showed
|
||||
that the old {@link org.apache.lucene.search.RangeQuery} (with raised
|
||||
{@link org.apache.lucene.search.BooleanQuery} clause count) took about 30-40 secs to complete,
|
||||
{@link org.apache.lucene.search.ConstantScoreRangeQuery} took 5 secs and
|
||||
{@link org.apache.lucene.search.trie.TrieRangeQuery} took <100ms to
|
||||
complete (on an Opteron64 machine, Java 1.5, {@link org.apache.lucene.search.trie.TrieUtils#VARIANT_8BIT}).
|
||||
{@link org.apache.lucene.search.ConstantScoreRangeQuery} took 5 secs and executing
|
||||
{@link org.apache.lucene.search.trie.LongTrieRangeFilter}<code>.asQuery()</code> took <100ms to
|
||||
complete (on an Opteron64 machine, Java 1.5, 8 bit precision step).
|
||||
This query type was developed for a geographic portal, where the performance for
|
||||
e.g. bounding boxes or exact date/time stamps is important.</p>
|
||||
|
||||
|
|
|
@ -0,0 +1,291 @@
|
|||
package org.apache.lucene.search.trie;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.RangeQuery;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
public class TestIntTrieRangeFilter extends LuceneTestCase
|
||||
{
|
||||
// distance of entries
|
||||
private static final int distance = 6666;
|
||||
// shift the starting of the values to the left, to also have negative values:
|
||||
private static final int startOffset = - 1 << 15;
|
||||
// number of docs to generate for testing
|
||||
private static final int noDocs = 10000;
|
||||
|
||||
private static final RAMDirectory directory;
|
||||
private static final IndexSearcher searcher;
|
||||
static {
|
||||
try {
|
||||
directory = new RAMDirectory();
|
||||
IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(),
|
||||
true, MaxFieldLength.UNLIMITED);
|
||||
|
||||
// Add a series of noDocs docs with increasing int values
|
||||
for (int l=0; l<noDocs; l++) {
|
||||
Document doc=new Document();
|
||||
// add fields, that have a distance to test general functionality
|
||||
final int val=distance*l+startOffset;
|
||||
TrieUtils.addIndexedFields(doc,"field8", TrieUtils.trieCodeInt(val, 8));
|
||||
doc.add(new Field("field8", TrieUtils.intToPrefixCoded(val), Field.Store.YES, Field.Index.NO));
|
||||
TrieUtils.addIndexedFields(doc,"field4", TrieUtils.trieCodeInt(val, 4));
|
||||
doc.add(new Field("field4", TrieUtils.intToPrefixCoded(val), Field.Store.YES, Field.Index.NO));
|
||||
TrieUtils.addIndexedFields(doc,"field2", TrieUtils.trieCodeInt(val, 2));
|
||||
doc.add(new Field("field2", TrieUtils.intToPrefixCoded(val), Field.Store.YES, Field.Index.NO));
|
||||
// add ascending fields with a distance of 1, beginning at -noDocs/2 to test the correct splitting of range and inclusive/exclusive
|
||||
TrieUtils.addIndexedFields(doc,"ascfield8", TrieUtils.trieCodeInt(l-(noDocs/2), 8));
|
||||
TrieUtils.addIndexedFields(doc,"ascfield4", TrieUtils.trieCodeInt(l-(noDocs/2), 4));
|
||||
TrieUtils.addIndexedFields(doc,"ascfield2", TrieUtils.trieCodeInt(l-(noDocs/2), 2));
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
|
||||
writer.optimize();
|
||||
writer.close();
|
||||
searcher=new IndexSearcher(directory);
|
||||
} catch (Exception e) {
|
||||
throw new Error(e);
|
||||
}
|
||||
}
|
||||
|
||||
private void testRange(int precisionStep) throws Exception {
|
||||
String field="field"+precisionStep;
|
||||
int count=3000;
|
||||
int lower=(distance*3/2)+startOffset, upper=lower + count*distance + (distance/3);
|
||||
IntTrieRangeFilter f=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), true, true);
|
||||
TopDocs topDocs = searcher.search(f.asQuery(), null, noDocs, Sort.INDEXORDER);
|
||||
System.out.println("Found "+f.getLastNumberOfTerms()+" distinct terms in range for field '"+field+"'.");
|
||||
ScoreDoc[] sd = topDocs.scoreDocs;
|
||||
assertNotNull(sd);
|
||||
assertEquals("Score doc count", count, sd.length );
|
||||
Document doc=searcher.doc(sd[0].doc);
|
||||
assertEquals("First doc", 2*distance+startOffset, TrieUtils.prefixCodedToInt(doc.get(field)) );
|
||||
doc=searcher.doc(sd[sd.length-1].doc);
|
||||
assertEquals("Last doc", (1+count)*distance+startOffset, TrieUtils.prefixCodedToInt(doc.get(field)) );
|
||||
}
|
||||
|
||||
public void testRange_8bit() throws Exception {
|
||||
testRange(8);
|
||||
}
|
||||
|
||||
public void testRange_4bit() throws Exception {
|
||||
testRange(4);
|
||||
}
|
||||
|
||||
public void testRange_2bit() throws Exception {
|
||||
testRange(2);
|
||||
}
|
||||
|
||||
private void testLeftOpenRange(int precisionStep) throws Exception {
|
||||
String field="field"+precisionStep;
|
||||
int count=3000;
|
||||
int upper=(count-1)*distance + (distance/3) + startOffset;
|
||||
IntTrieRangeFilter f=new IntTrieRangeFilter(field, precisionStep, null, new Integer(upper), true, true);
|
||||
TopDocs topDocs = searcher.search(f.asQuery(), null, noDocs, Sort.INDEXORDER);
|
||||
System.out.println("Found "+f.getLastNumberOfTerms()+" distinct terms in left open range for field '"+field+"'.");
|
||||
ScoreDoc[] sd = topDocs.scoreDocs;
|
||||
assertNotNull(sd);
|
||||
assertEquals("Score doc count", count, sd.length );
|
||||
Document doc=searcher.doc(sd[0].doc);
|
||||
assertEquals("First doc", startOffset, TrieUtils.prefixCodedToInt(doc.get(field)) );
|
||||
doc=searcher.doc(sd[sd.length-1].doc);
|
||||
assertEquals("Last doc", (count-1)*distance+startOffset, TrieUtils.prefixCodedToInt(doc.get(field)) );
|
||||
}
|
||||
|
||||
public void testLeftOpenRange_8bit() throws Exception {
|
||||
testLeftOpenRange(8);
|
||||
}
|
||||
|
||||
public void testLeftOpenRange_4bit() throws Exception {
|
||||
testLeftOpenRange(4);
|
||||
}
|
||||
|
||||
public void testLeftOpenRange_2bit() throws Exception {
|
||||
testLeftOpenRange(2);
|
||||
}
|
||||
|
||||
private void testRightOpenRange(int precisionStep) throws Exception {
|
||||
String field="field"+precisionStep;
|
||||
int count=3000;
|
||||
int lower=(count-1)*distance + (distance/3) +startOffset;
|
||||
IntTrieRangeFilter f=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), null, true, true);
|
||||
TopDocs topDocs = searcher.search(f.asQuery(), null, noDocs, Sort.INDEXORDER);
|
||||
System.out.println("Found "+f.getLastNumberOfTerms()+" distinct terms in right open range for field '"+field+"'.");
|
||||
ScoreDoc[] sd = topDocs.scoreDocs;
|
||||
assertNotNull(sd);
|
||||
assertEquals("Score doc count", noDocs-count, sd.length );
|
||||
Document doc=searcher.doc(sd[0].doc);
|
||||
assertEquals("First doc", count*distance+startOffset, TrieUtils.prefixCodedToInt(doc.get(field)) );
|
||||
doc=searcher.doc(sd[sd.length-1].doc);
|
||||
assertEquals("Last doc", (noDocs-1)*distance+startOffset, TrieUtils.prefixCodedToInt(doc.get(field)) );
|
||||
}
|
||||
|
||||
public void testRightOpenRange_8bit() throws Exception {
|
||||
testRightOpenRange(8);
|
||||
}
|
||||
|
||||
public void testRightOpenRange_4bit() throws Exception {
|
||||
testRightOpenRange(4);
|
||||
}
|
||||
|
||||
public void testRightOpenRange_2bit() throws Exception {
|
||||
testRightOpenRange(2);
|
||||
}
|
||||
|
||||
private void testRandomTrieAndClassicRangeQuery(int precisionStep) throws Exception {
|
||||
final Random rnd=newRandom();
|
||||
String field="field"+precisionStep;
|
||||
// 50 random tests, the tests may also return 0 results, if min>max, but this is ok
|
||||
for (int i=0; i<50; i++) {
|
||||
int lower=(int)(rnd.nextDouble()*noDocs*distance)+startOffset;
|
||||
int upper=(int)(rnd.nextDouble()*noDocs*distance)+startOffset;
|
||||
// test inclusive range
|
||||
Query tq=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), true, true).asQuery();
|
||||
RangeQuery cq=new RangeQuery(field, TrieUtils.intToPrefixCoded(lower), TrieUtils.intToPrefixCoded(upper), true, true);
|
||||
cq.setConstantScoreRewrite(true);
|
||||
TopDocs tTopDocs = searcher.search(tq, 1);
|
||||
TopDocs cTopDocs = searcher.search(cq, 1);
|
||||
assertEquals("Returned count for IntTrieRangeFilter and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
|
||||
// test exclusive range
|
||||
tq=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), false, false).asQuery();
|
||||
cq=new RangeQuery(field, TrieUtils.intToPrefixCoded(lower), TrieUtils.intToPrefixCoded(upper), false, false);
|
||||
cq.setConstantScoreRewrite(true);
|
||||
tTopDocs = searcher.search(tq, 1);
|
||||
cTopDocs = searcher.search(cq, 1);
|
||||
assertEquals("Returned count for IntTrieRangeFilter and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
|
||||
// test left exclusive range
|
||||
tq=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), false, true).asQuery();
|
||||
cq=new RangeQuery(field, TrieUtils.intToPrefixCoded(lower), TrieUtils.intToPrefixCoded(upper), false, true);
|
||||
cq.setConstantScoreRewrite(true);
|
||||
tTopDocs = searcher.search(tq, 1);
|
||||
cTopDocs = searcher.search(cq, 1);
|
||||
assertEquals("Returned count for IntTrieRangeFilter and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
|
||||
// test right exclusive range
|
||||
tq=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), true, false).asQuery();
|
||||
cq=new RangeQuery(field, TrieUtils.intToPrefixCoded(lower), TrieUtils.intToPrefixCoded(upper), true, false);
|
||||
cq.setConstantScoreRewrite(true);
|
||||
tTopDocs = searcher.search(tq, 1);
|
||||
cTopDocs = searcher.search(cq, 1);
|
||||
assertEquals("Returned count for IntTrieRangeFilter and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
|
||||
}
|
||||
}
|
||||
|
||||
public void testRandomTrieAndClassicRangeQuery_8bit() throws Exception {
|
||||
testRandomTrieAndClassicRangeQuery(8);
|
||||
}
|
||||
|
||||
public void testRandomTrieAndClassicRangeQuery_4bit() throws Exception {
|
||||
testRandomTrieAndClassicRangeQuery(4);
|
||||
}
|
||||
|
||||
public void testRandomTrieAndClassicRangeQuery_2bit() throws Exception {
|
||||
testRandomTrieAndClassicRangeQuery(2);
|
||||
}
|
||||
|
||||
private void testRangeSplit(int precisionStep) throws Exception {
|
||||
final Random rnd=newRandom();
|
||||
String field="ascfield"+precisionStep;
|
||||
// 50 random tests
|
||||
for (int i=0; i<50; i++) {
|
||||
int lower=(int)(rnd.nextDouble()*noDocs - noDocs/2);
|
||||
int upper=(int)(rnd.nextDouble()*noDocs - noDocs/2);
|
||||
if (lower>upper) {
|
||||
int a=lower; lower=upper; upper=a;
|
||||
}
|
||||
// test inclusive range
|
||||
Query tq=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), true, true).asQuery();
|
||||
TopDocs tTopDocs = searcher.search(tq, 1);
|
||||
assertEquals("Returned count of range query must be equal to inclusive range length", upper-lower+1, tTopDocs.totalHits );
|
||||
// test exclusive range
|
||||
tq=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), false, false).asQuery();
|
||||
tTopDocs = searcher.search(tq, 1);
|
||||
assertEquals("Returned count of range query must be equal to exclusive range length", Math.max(upper-lower-1, 0), tTopDocs.totalHits );
|
||||
// test left exclusive range
|
||||
tq=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), false, true).asQuery();
|
||||
tTopDocs = searcher.search(tq, 1);
|
||||
assertEquals("Returned count of range query must be equal to half exclusive range length", upper-lower, tTopDocs.totalHits );
|
||||
// test right exclusive range
|
||||
tq=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), true, false).asQuery();
|
||||
tTopDocs = searcher.search(tq, 1);
|
||||
assertEquals("Returned count of range query must be equal to half exclusive range length", upper-lower, tTopDocs.totalHits );
|
||||
}
|
||||
}
|
||||
|
||||
public void testRangeSplit_8bit() throws Exception {
|
||||
testRangeSplit(8);
|
||||
}
|
||||
|
||||
public void testRangeSplit_4bit() throws Exception {
|
||||
testRangeSplit(4);
|
||||
}
|
||||
|
||||
public void testRangeSplit_2bit() throws Exception {
|
||||
testRangeSplit(2);
|
||||
}
|
||||
|
||||
private void testSorting(int precisionStep) throws Exception {
|
||||
final Random rnd=newRandom();
|
||||
String field="field"+precisionStep;
|
||||
// 10 random tests, the index order is ascending,
|
||||
// so using a reverse sort field should retun descending documents
|
||||
for (int i=0; i<10; i++) {
|
||||
int lower=(int)(rnd.nextDouble()*noDocs*distance)+startOffset;
|
||||
int upper=(int)(rnd.nextDouble()*noDocs*distance)+startOffset;
|
||||
if (lower>upper) {
|
||||
int a=lower; lower=upper; upper=a;
|
||||
}
|
||||
Query tq=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), true, true).asQuery();
|
||||
TopDocs topDocs = searcher.search(tq, null, noDocs, new Sort(TrieUtils.getIntSortField(field, true)));
|
||||
if (topDocs.totalHits==0) continue;
|
||||
ScoreDoc[] sd = topDocs.scoreDocs;
|
||||
assertNotNull(sd);
|
||||
int last=TrieUtils.prefixCodedToInt(searcher.doc(sd[0].doc).get(field));
|
||||
for (int j=1; j<sd.length; j++) {
|
||||
int act=TrieUtils.prefixCodedToInt(searcher.doc(sd[j].doc).get(field));
|
||||
assertTrue("Docs should be sorted backwards", last>act );
|
||||
last=act;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testSorting_8bit() throws Exception {
|
||||
testSorting(8);
|
||||
}
|
||||
|
||||
public void testSorting_4bit() throws Exception {
|
||||
testSorting(4);
|
||||
}
|
||||
|
||||
public void testSorting_2bit() throws Exception {
|
||||
testSorting(2);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,291 @@
|
|||
package org.apache.lucene.search.trie;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.RangeQuery;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
public class TestLongTrieRangeFilter extends LuceneTestCase
|
||||
{
|
||||
// distance of entries
|
||||
private static final long distance = 66666L;
|
||||
// shift the starting of the values to the left, to also have negative values:
|
||||
private static final long startOffset = - 1L << 31;
|
||||
// number of docs to generate for testing
|
||||
private static final int noDocs = 10000;
|
||||
|
||||
private static final RAMDirectory directory;
|
||||
private static final IndexSearcher searcher;
|
||||
static {
|
||||
try {
|
||||
directory = new RAMDirectory();
|
||||
IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(),
|
||||
true, MaxFieldLength.UNLIMITED);
|
||||
|
||||
// Add a series of noDocs docs with increasing long values
|
||||
for (int l=0; l<noDocs; l++) {
|
||||
Document doc=new Document();
|
||||
// add fields, that have a distance to test general functionality
|
||||
final long val=distance*l+startOffset;
|
||||
TrieUtils.addIndexedFields(doc,"field8", TrieUtils.trieCodeLong(val, 8));
|
||||
doc.add(new Field("field8", TrieUtils.longToPrefixCoded(val), Field.Store.YES, Field.Index.NO));
|
||||
TrieUtils.addIndexedFields(doc,"field4", TrieUtils.trieCodeLong(val, 4));
|
||||
doc.add(new Field("field4", TrieUtils.longToPrefixCoded(val), Field.Store.YES, Field.Index.NO));
|
||||
TrieUtils.addIndexedFields(doc,"field2", TrieUtils.trieCodeLong(val, 2));
|
||||
doc.add(new Field("field2", TrieUtils.longToPrefixCoded(val), Field.Store.YES, Field.Index.NO));
|
||||
// add ascending fields with a distance of 1, beginning at -noDocs/2 to test the correct splitting of range and inclusive/exclusive
|
||||
TrieUtils.addIndexedFields(doc,"ascfield8", TrieUtils.trieCodeLong(l-(noDocs/2), 8));
|
||||
TrieUtils.addIndexedFields(doc,"ascfield4", TrieUtils.trieCodeLong(l-(noDocs/2), 4));
|
||||
TrieUtils.addIndexedFields(doc,"ascfield2", TrieUtils.trieCodeLong(l-(noDocs/2), 2));
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
|
||||
writer.optimize();
|
||||
writer.close();
|
||||
searcher=new IndexSearcher(directory);
|
||||
} catch (Exception e) {
|
||||
throw new Error(e);
|
||||
}
|
||||
}
|
||||
|
||||
private void testRange(int precisionStep) throws Exception {
|
||||
String field="field"+precisionStep;
|
||||
int count=3000;
|
||||
long lower=(distance*3/2)+startOffset, upper=lower + count*distance + (distance/3);
|
||||
LongTrieRangeFilter f=new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), true, true);
|
||||
TopDocs topDocs = searcher.search(f.asQuery(), null, noDocs, Sort.INDEXORDER);
|
||||
System.out.println("Found "+f.getLastNumberOfTerms()+" distinct terms in range for field '"+field+"'.");
|
||||
ScoreDoc[] sd = topDocs.scoreDocs;
|
||||
assertNotNull(sd);
|
||||
assertEquals("Score doc count", count, sd.length );
|
||||
Document doc=searcher.doc(sd[0].doc);
|
||||
assertEquals("First doc", 2*distance+startOffset, TrieUtils.prefixCodedToLong(doc.get(field)) );
|
||||
doc=searcher.doc(sd[sd.length-1].doc);
|
||||
assertEquals("Last doc", (1+count)*distance+startOffset, TrieUtils.prefixCodedToLong(doc.get(field)) );
|
||||
}
|
||||
|
||||
public void testRange_8bit() throws Exception {
|
||||
testRange(8);
|
||||
}
|
||||
|
||||
public void testRange_4bit() throws Exception {
|
||||
testRange(4);
|
||||
}
|
||||
|
||||
public void testRange_2bit() throws Exception {
|
||||
testRange(2);
|
||||
}
|
||||
|
||||
private void testLeftOpenRange(int precisionStep) throws Exception {
|
||||
String field="field"+precisionStep;
|
||||
int count=3000;
|
||||
long upper=(count-1)*distance + (distance/3) + startOffset;
|
||||
LongTrieRangeFilter f=new LongTrieRangeFilter(field, precisionStep, null, new Long(upper), true, true);
|
||||
TopDocs topDocs = searcher.search(f.asQuery(), null, noDocs, Sort.INDEXORDER);
|
||||
System.out.println("Found "+f.getLastNumberOfTerms()+" distinct terms in left open range for field '"+field+"'.");
|
||||
ScoreDoc[] sd = topDocs.scoreDocs;
|
||||
assertNotNull(sd);
|
||||
assertEquals("Score doc count", count, sd.length );
|
||||
Document doc=searcher.doc(sd[0].doc);
|
||||
assertEquals("First doc", startOffset, TrieUtils.prefixCodedToLong(doc.get(field)) );
|
||||
doc=searcher.doc(sd[sd.length-1].doc);
|
||||
assertEquals("Last doc", (count-1)*distance+startOffset, TrieUtils.prefixCodedToLong(doc.get(field)) );
|
||||
}
|
||||
|
||||
public void testLeftOpenRange_8bit() throws Exception {
|
||||
testLeftOpenRange(8);
|
||||
}
|
||||
|
||||
public void testLeftOpenRange_4bit() throws Exception {
|
||||
testLeftOpenRange(4);
|
||||
}
|
||||
|
||||
public void testLeftOpenRange_2bit() throws Exception {
|
||||
testLeftOpenRange(2);
|
||||
}
|
||||
|
||||
private void testRightOpenRange(int precisionStep) throws Exception {
|
||||
String field="field"+precisionStep;
|
||||
int count=3000;
|
||||
long lower=(count-1)*distance + (distance/3) +startOffset;
|
||||
LongTrieRangeFilter f=new LongTrieRangeFilter(field, precisionStep, new Long(lower), null, true, true);
|
||||
TopDocs topDocs = searcher.search(f.asQuery(), null, noDocs, Sort.INDEXORDER);
|
||||
System.out.println("Found "+f.getLastNumberOfTerms()+" distinct terms in right open range for field '"+field+"'.");
|
||||
ScoreDoc[] sd = topDocs.scoreDocs;
|
||||
assertNotNull(sd);
|
||||
assertEquals("Score doc count", noDocs-count, sd.length );
|
||||
Document doc=searcher.doc(sd[0].doc);
|
||||
assertEquals("First doc", count*distance+startOffset, TrieUtils.prefixCodedToLong(doc.get(field)) );
|
||||
doc=searcher.doc(sd[sd.length-1].doc);
|
||||
assertEquals("Last doc", (noDocs-1)*distance+startOffset, TrieUtils.prefixCodedToLong(doc.get(field)) );
|
||||
}
|
||||
|
||||
public void testRightOpenRange_8bit() throws Exception {
|
||||
testRightOpenRange(8);
|
||||
}
|
||||
|
||||
public void testRightOpenRange_4bit() throws Exception {
|
||||
testRightOpenRange(4);
|
||||
}
|
||||
|
||||
public void testRightOpenRange_2bit() throws Exception {
|
||||
testRightOpenRange(2);
|
||||
}
|
||||
|
||||
private void testRandomTrieAndClassicRangeQuery(int precisionStep) throws Exception {
|
||||
final Random rnd=newRandom();
|
||||
String field="field"+precisionStep;
|
||||
// 50 random tests, the tests may also return 0 results, if min>max, but this is ok
|
||||
for (int i=0; i<50; i++) {
|
||||
long lower=(long)(rnd.nextDouble()*noDocs*distance)+startOffset;
|
||||
long upper=(long)(rnd.nextDouble()*noDocs*distance)+startOffset;
|
||||
// test inclusive range
|
||||
Query tq=new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), true, true).asQuery();
|
||||
RangeQuery cq=new RangeQuery(field, TrieUtils.longToPrefixCoded(lower), TrieUtils.longToPrefixCoded(upper), true, true);
|
||||
cq.setConstantScoreRewrite(true);
|
||||
TopDocs tTopDocs = searcher.search(tq, 1);
|
||||
TopDocs cTopDocs = searcher.search(cq, 1);
|
||||
assertEquals("Returned count for LongTrieRangeFilter and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
|
||||
// test exclusive range
|
||||
tq=new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), false, false).asQuery();
|
||||
cq=new RangeQuery(field, TrieUtils.longToPrefixCoded(lower), TrieUtils.longToPrefixCoded(upper), false, false);
|
||||
cq.setConstantScoreRewrite(true);
|
||||
tTopDocs = searcher.search(tq, 1);
|
||||
cTopDocs = searcher.search(cq, 1);
|
||||
assertEquals("Returned count for LongTrieRangeFilter and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
|
||||
// test left exclusive range
|
||||
tq=new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), false, true).asQuery();
|
||||
cq=new RangeQuery(field, TrieUtils.longToPrefixCoded(lower), TrieUtils.longToPrefixCoded(upper), false, true);
|
||||
cq.setConstantScoreRewrite(true);
|
||||
tTopDocs = searcher.search(tq, 1);
|
||||
cTopDocs = searcher.search(cq, 1);
|
||||
assertEquals("Returned count for LongTrieRangeFilter and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
|
||||
// test right exclusive range
|
||||
tq=new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), true, false).asQuery();
|
||||
cq=new RangeQuery(field, TrieUtils.longToPrefixCoded(lower), TrieUtils.longToPrefixCoded(upper), true, false);
|
||||
cq.setConstantScoreRewrite(true);
|
||||
tTopDocs = searcher.search(tq, 1);
|
||||
cTopDocs = searcher.search(cq, 1);
|
||||
assertEquals("Returned count for LongTrieRangeFilter and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
|
||||
}
|
||||
}
|
||||
|
||||
public void testRandomTrieAndClassicRangeQuery_8bit() throws Exception {
|
||||
testRandomTrieAndClassicRangeQuery(8);
|
||||
}
|
||||
|
||||
public void testRandomTrieAndClassicRangeQuery_4bit() throws Exception {
|
||||
testRandomTrieAndClassicRangeQuery(4);
|
||||
}
|
||||
|
||||
public void testRandomTrieAndClassicRangeQuery_2bit() throws Exception {
|
||||
testRandomTrieAndClassicRangeQuery(2);
|
||||
}
|
||||
|
||||
private void testRangeSplit(int precisionStep) throws Exception {
|
||||
final Random rnd=newRandom();
|
||||
String field="ascfield"+precisionStep;
|
||||
// 50 random tests
|
||||
for (int i=0; i<50; i++) {
|
||||
long lower=(long)(rnd.nextDouble()*noDocs - noDocs/2);
|
||||
long upper=(long)(rnd.nextDouble()*noDocs - noDocs/2);
|
||||
if (lower>upper) {
|
||||
long a=lower; lower=upper; upper=a;
|
||||
}
|
||||
// test inclusive range
|
||||
Query tq=new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), true, true).asQuery();
|
||||
TopDocs tTopDocs = searcher.search(tq, 1);
|
||||
assertEquals("Returned count of range query must be equal to inclusive range length", upper-lower+1, tTopDocs.totalHits );
|
||||
// test exclusive range
|
||||
tq=new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), false, false).asQuery();
|
||||
tTopDocs = searcher.search(tq, 1);
|
||||
assertEquals("Returned count of range query must be equal to exclusive range length", Math.max(upper-lower-1, 0), tTopDocs.totalHits );
|
||||
// test left exclusive range
|
||||
tq=new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), false, true).asQuery();
|
||||
tTopDocs = searcher.search(tq, 1);
|
||||
assertEquals("Returned count of range query must be equal to half exclusive range length", upper-lower, tTopDocs.totalHits );
|
||||
// test right exclusive range
|
||||
tq=new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), true, false).asQuery();
|
||||
tTopDocs = searcher.search(tq, 1);
|
||||
assertEquals("Returned count of range query must be equal to half exclusive range length", upper-lower, tTopDocs.totalHits );
|
||||
}
|
||||
}
|
||||
|
||||
public void testRangeSplit_8bit() throws Exception {
|
||||
testRangeSplit(8);
|
||||
}
|
||||
|
||||
public void testRangeSplit_4bit() throws Exception {
|
||||
testRangeSplit(4);
|
||||
}
|
||||
|
||||
public void testRangeSplit_2bit() throws Exception {
|
||||
testRangeSplit(2);
|
||||
}
|
||||
|
||||
private void testSorting(int precisionStep) throws Exception {
|
||||
final Random rnd=newRandom();
|
||||
String field="field"+precisionStep;
|
||||
// 10 random tests, the index order is ascending,
|
||||
// so using a reverse sort field should retun descending documents
|
||||
for (int i=0; i<10; i++) {
|
||||
long lower=(long)(rnd.nextDouble()*noDocs*distance)+startOffset;
|
||||
long upper=(long)(rnd.nextDouble()*noDocs*distance)+startOffset;
|
||||
if (lower>upper) {
|
||||
long a=lower; lower=upper; upper=a;
|
||||
}
|
||||
Query tq=new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), true, true).asQuery();
|
||||
TopDocs topDocs = searcher.search(tq, null, noDocs, new Sort(TrieUtils.getLongSortField(field, true)));
|
||||
if (topDocs.totalHits==0) continue;
|
||||
ScoreDoc[] sd = topDocs.scoreDocs;
|
||||
assertNotNull(sd);
|
||||
long last=TrieUtils.prefixCodedToLong(searcher.doc(sd[0].doc).get(field));
|
||||
for (int j=1; j<sd.length; j++) {
|
||||
long act=TrieUtils.prefixCodedToLong(searcher.doc(sd[j].doc).get(field));
|
||||
assertTrue("Docs should be sorted backwards", last>act );
|
||||
last=act;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testSorting_8bit() throws Exception {
|
||||
testSorting(8);
|
||||
}
|
||||
|
||||
public void testSorting_4bit() throws Exception {
|
||||
testSorting(4);
|
||||
}
|
||||
|
||||
public void testSorting_2bit() throws Exception {
|
||||
testSorting(2);
|
||||
}
|
||||
|
||||
}
|
|
@ -1,265 +0,0 @@
|
|||
package org.apache.lucene.search.trie;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.RangeQuery;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
public class TestTrieRangeQuery extends LuceneTestCase
|
||||
{
|
||||
private static final long distance=66666;
|
||||
|
||||
private static final RAMDirectory directory;
|
||||
private static final IndexSearcher searcher;
|
||||
static {
|
||||
try {
|
||||
directory = new RAMDirectory();
|
||||
IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(),
|
||||
true, MaxFieldLength.UNLIMITED);
|
||||
|
||||
// Add a series of 10000 docs with increasing long values
|
||||
for (long l=0L; l<10000L; l++) {
|
||||
Document doc=new Document();
|
||||
// add fields, that have a distance to test general functionality
|
||||
TrieUtils.VARIANT_8BIT.addLongTrieCodedDocumentField(
|
||||
doc, "field8", distance*l, true /*index it*/, Field.Store.YES
|
||||
);
|
||||
TrieUtils.VARIANT_4BIT.addLongTrieCodedDocumentField(
|
||||
doc, "field4", distance*l, true /*index it*/, Field.Store.YES
|
||||
);
|
||||
TrieUtils.VARIANT_2BIT.addLongTrieCodedDocumentField(
|
||||
doc, "field2", distance*l, true /*index it*/, Field.Store.YES
|
||||
);
|
||||
// add ascending fields with a distance of 1 to test the correct splitting of range and inclusive/exclusive
|
||||
TrieUtils.VARIANT_8BIT.addLongTrieCodedDocumentField(
|
||||
doc, "ascfield8", l, true /*index it*/, Field.Store.NO
|
||||
);
|
||||
TrieUtils.VARIANT_4BIT.addLongTrieCodedDocumentField(
|
||||
doc, "ascfield4", l, true /*index it*/, Field.Store.NO
|
||||
);
|
||||
TrieUtils.VARIANT_2BIT.addLongTrieCodedDocumentField(
|
||||
doc, "ascfield2", l, true /*index it*/, Field.Store.NO
|
||||
);
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
|
||||
writer.optimize();
|
||||
writer.close();
|
||||
searcher=new IndexSearcher(directory);
|
||||
} catch (Exception e) {
|
||||
throw new Error(e);
|
||||
}
|
||||
}
|
||||
|
||||
private void testRange(final TrieUtils variant) throws Exception {
|
||||
String field="field"+variant.TRIE_BITS;
|
||||
int count=3000;
|
||||
long lower=96666L, upper=lower + count*distance + 1234L;
|
||||
TrieRangeQuery q=new TrieRangeQuery(field, new Long(lower), new Long(upper), true, true, variant);
|
||||
TopDocs topDocs = searcher.search(q, null, 10000, Sort.INDEXORDER);
|
||||
System.out.println("Found "+q.getLastNumberOfTerms()+" distinct terms in range for field '"+field+"'.");
|
||||
ScoreDoc[] sd = topDocs.scoreDocs;
|
||||
assertNotNull(sd);
|
||||
assertEquals("Score docs must match "+count+" docs, found "+sd.length+" docs", sd.length, count );
|
||||
Document doc=searcher.doc(sd[0].doc);
|
||||
assertEquals("First doc should be "+(2*distance), variant.trieCodedToLong(doc.get(field)), 2*distance );
|
||||
doc=searcher.doc(sd[sd.length-1].doc);
|
||||
assertEquals("Last doc should be "+((1+count)*distance), variant.trieCodedToLong(doc.get(field)), (1+count)*distance );
|
||||
}
|
||||
|
||||
public void testRange_8bit() throws Exception {
|
||||
testRange(TrieUtils.VARIANT_8BIT);
|
||||
}
|
||||
|
||||
public void testRange_4bit() throws Exception {
|
||||
testRange(TrieUtils.VARIANT_4BIT);
|
||||
}
|
||||
|
||||
public void testRange_2bit() throws Exception {
|
||||
testRange(TrieUtils.VARIANT_2BIT);
|
||||
}
|
||||
|
||||
private void testLeftOpenRange(final TrieUtils variant) throws Exception {
|
||||
String field="field"+variant.TRIE_BITS;
|
||||
int count=3000;
|
||||
long upper=(count-1)*distance + 1234L;
|
||||
TrieRangeQuery q=new TrieRangeQuery(field, null, new Long(upper), true, true, variant);
|
||||
TopDocs topDocs = searcher.search(q, null, 10000, Sort.INDEXORDER);
|
||||
System.out.println("Found "+q.getLastNumberOfTerms()+" distinct terms in left open range for field '"+field+"'.");
|
||||
ScoreDoc[] sd = topDocs.scoreDocs;
|
||||
assertNotNull(sd);
|
||||
assertEquals("Score docs must match "+count+" docs, found "+sd.length+" docs", sd.length, count );
|
||||
Document doc=searcher.doc(sd[0].doc);
|
||||
assertEquals("First doc should be 0", variant.trieCodedToLong(doc.get(field)), 0L );
|
||||
doc=searcher.doc(sd[sd.length-1].doc);
|
||||
assertEquals("Last doc should be "+((count-1)*distance), variant.trieCodedToLong(doc.get(field)), (count-1)*distance );
|
||||
}
|
||||
|
||||
public void testLeftOpenRange_8bit() throws Exception {
|
||||
testLeftOpenRange(TrieUtils.VARIANT_8BIT);
|
||||
}
|
||||
|
||||
public void testLeftOpenRange_4bit() throws Exception {
|
||||
testLeftOpenRange(TrieUtils.VARIANT_4BIT);
|
||||
}
|
||||
|
||||
public void testLeftOpenRange_2bit() throws Exception {
|
||||
testLeftOpenRange(TrieUtils.VARIANT_2BIT);
|
||||
}
|
||||
|
||||
private void testRandomTrieAndClassicRangeQuery(final TrieUtils variant) throws Exception {
|
||||
final Random rnd=newRandom();
|
||||
String field="field"+variant.TRIE_BITS;
|
||||
// 50 random tests, the tests may also return 0 results, if min>max, but this is ok
|
||||
for (int i=0; i<50; i++) {
|
||||
long lower=(long)(rnd.nextDouble()*10000L*distance);
|
||||
long upper=(long)(rnd.nextDouble()*10000L*distance);
|
||||
// test inclusive range
|
||||
TrieRangeQuery tq=new TrieRangeQuery(field, new Long(lower), new Long(upper), true, true, variant);
|
||||
RangeQuery cq=new RangeQuery(field, variant.longToTrieCoded(lower), variant.longToTrieCoded(upper), true, true);
|
||||
cq.setConstantScoreRewrite(true);
|
||||
TopDocs tTopDocs = searcher.search(tq, 1);
|
||||
TopDocs cTopDocs = searcher.search(cq, 1);
|
||||
assertEquals("Returned count for TrieRangeQuery and RangeQuery must be equal", tTopDocs.totalHits, cTopDocs.totalHits );
|
||||
// test exclusive range
|
||||
tq=new TrieRangeQuery(field, new Long(lower), new Long(upper), false, false, variant);
|
||||
cq=new RangeQuery(field, variant.longToTrieCoded(lower), variant.longToTrieCoded(upper), false, false);
|
||||
cq.setConstantScoreRewrite(true);
|
||||
tTopDocs = searcher.search(tq, 1);
|
||||
cTopDocs = searcher.search(cq, 1);
|
||||
assertEquals("Returned count for TrieRangeQuery and RangeQuery must be equal", tTopDocs.totalHits, cTopDocs.totalHits );
|
||||
// test left exclusive range
|
||||
tq=new TrieRangeQuery(field, new Long(lower), new Long(upper), false, true, variant);
|
||||
cq=new RangeQuery(field, variant.longToTrieCoded(lower), variant.longToTrieCoded(upper), false, true);
|
||||
cq.setConstantScoreRewrite(true);
|
||||
tTopDocs = searcher.search(tq, 1);
|
||||
cTopDocs = searcher.search(cq, 1);
|
||||
assertEquals("Returned count for TrieRangeQuery and RangeQuery must be equal", tTopDocs.totalHits, cTopDocs.totalHits );
|
||||
// test right exclusive range
|
||||
tq=new TrieRangeQuery(field, new Long(lower), new Long(upper), true, false, variant);
|
||||
cq=new RangeQuery(field, variant.longToTrieCoded(lower), variant.longToTrieCoded(upper), true, false);
|
||||
cq.setConstantScoreRewrite(true);
|
||||
tTopDocs = searcher.search(tq, 1);
|
||||
cTopDocs = searcher.search(cq, 1);
|
||||
assertEquals("Returned count for TrieRangeQuery and RangeQuery must be equal", tTopDocs.totalHits, cTopDocs.totalHits );
|
||||
}
|
||||
}
|
||||
|
||||
public void testRandomTrieAndClassicRangeQuery_8bit() throws Exception {
|
||||
testRandomTrieAndClassicRangeQuery(TrieUtils.VARIANT_8BIT);
|
||||
}
|
||||
|
||||
public void testRandomTrieAndClassicRangeQuery_4bit() throws Exception {
|
||||
testRandomTrieAndClassicRangeQuery(TrieUtils.VARIANT_4BIT);
|
||||
}
|
||||
|
||||
public void testRandomTrieAndClassicRangeQuery_2bit() throws Exception {
|
||||
testRandomTrieAndClassicRangeQuery(TrieUtils.VARIANT_2BIT);
|
||||
}
|
||||
|
||||
private void testRangeSplit(final TrieUtils variant) throws Exception {
|
||||
final Random rnd=newRandom();
|
||||
String field="ascfield"+variant.TRIE_BITS;
|
||||
// 50 random tests
|
||||
for (int i=0; i<50; i++) {
|
||||
long lower=(long)(rnd.nextDouble()*10000L);
|
||||
long upper=(long)(rnd.nextDouble()*10000L);
|
||||
if (lower>upper) {
|
||||
long a=lower; lower=upper; upper=a;
|
||||
}
|
||||
// test inclusive range
|
||||
TrieRangeQuery tq=new TrieRangeQuery(field, new Long(lower), new Long(upper), true, true, variant);
|
||||
TopDocs tTopDocs = searcher.search(tq, 1);
|
||||
assertEquals("Returned count of range query must be equal to inclusive range length", tTopDocs.totalHits, upper-lower+1 );
|
||||
// test exclusive range
|
||||
tq=new TrieRangeQuery(field, new Long(lower), new Long(upper), false, false, variant);
|
||||
tTopDocs = searcher.search(tq, 1);
|
||||
assertEquals("Returned count of range query must be equal to exclusive range length", tTopDocs.totalHits, Math.max(upper-lower-1, 0) );
|
||||
// test left exclusive range
|
||||
tq=new TrieRangeQuery(field, new Long(lower), new Long(upper), false, true, variant);
|
||||
tTopDocs = searcher.search(tq, 1);
|
||||
assertEquals("Returned count of range query must be equal to half exclusive range length", tTopDocs.totalHits, upper-lower );
|
||||
// test right exclusive range
|
||||
tq=new TrieRangeQuery(field, new Long(lower), new Long(upper), true, false, variant);
|
||||
tTopDocs = searcher.search(tq, 1);
|
||||
assertEquals("Returned count of range query must be equal to half exclusive range length", tTopDocs.totalHits, upper-lower );
|
||||
}
|
||||
}
|
||||
|
||||
public void testRangeSplit_8bit() throws Exception {
|
||||
testRangeSplit(TrieUtils.VARIANT_8BIT);
|
||||
}
|
||||
|
||||
public void testRangeSplit_4bit() throws Exception {
|
||||
testRangeSplit(TrieUtils.VARIANT_4BIT);
|
||||
}
|
||||
|
||||
public void testRangeSplit_2bit() throws Exception {
|
||||
testRangeSplit(TrieUtils.VARIANT_2BIT);
|
||||
}
|
||||
|
||||
private void testSorting(final TrieUtils variant) throws Exception {
|
||||
final Random rnd=newRandom();
|
||||
String field="field"+variant.TRIE_BITS;
|
||||
// 10 random tests, the index order is ascending,
|
||||
// so using a reverse sort field should retun descending documents
|
||||
for (int i=0; i<10; i++) {
|
||||
long lower=(long)(rnd.nextDouble()*10000L*distance);
|
||||
long upper=(long)(rnd.nextDouble()*10000L*distance);
|
||||
if (lower>upper) {
|
||||
long a=lower; lower=upper; upper=a;
|
||||
}
|
||||
TrieRangeQuery tq=new TrieRangeQuery(field, new Long(lower), new Long(upper), true, true, variant);
|
||||
TopDocs topDocs = searcher.search(tq, null, 10000, new Sort(variant.getSortField(field, true)));
|
||||
if (topDocs.totalHits==0) continue;
|
||||
ScoreDoc[] sd = topDocs.scoreDocs;
|
||||
assertNotNull(sd);
|
||||
long last=variant.trieCodedToLong(searcher.doc(sd[0].doc).get(field));
|
||||
for (int j=1; j<sd.length; j++) {
|
||||
long act=variant.trieCodedToLong(searcher.doc(sd[j].doc).get(field));
|
||||
assertTrue("Docs should be sorted backwards", last>act );
|
||||
last=act;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testSorting_8bit() throws Exception {
|
||||
testSorting(TrieUtils.VARIANT_8BIT);
|
||||
}
|
||||
|
||||
public void testSorting_4bit() throws Exception {
|
||||
testSorting(TrieUtils.VARIANT_4BIT);
|
||||
}
|
||||
|
||||
public void testSorting_2bit() throws Exception {
|
||||
testSorting(TrieUtils.VARIANT_2BIT);
|
||||
}
|
||||
|
||||
}
|
|
@ -17,155 +17,304 @@ package org.apache.lucene.search.trie;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Date;
|
||||
import java.util.GregorianCalendar;
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Iterator;
|
||||
|
||||
public class TestTrieUtils extends LuceneTestCase {
|
||||
|
||||
public void testSpecialValues() throws Exception {
|
||||
// Variant 8bit values
|
||||
assertEquals( TrieUtils.VARIANT_8BIT.TRIE_CODED_NUMERIC_MIN, "\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100");
|
||||
assertEquals( TrieUtils.VARIANT_8BIT.TRIE_CODED_NUMERIC_MAX, "\u01ff\u01ff\u01ff\u01ff\u01ff\u01ff\u01ff\u01ff");
|
||||
assertEquals( TrieUtils.VARIANT_8BIT.longToTrieCoded(-1), "\u017f\u01ff\u01ff\u01ff\u01ff\u01ff\u01ff\u01ff");
|
||||
assertEquals( TrieUtils.VARIANT_8BIT.longToTrieCoded(0), "\u0180\u0100\u0100\u0100\u0100\u0100\u0100\u0100");
|
||||
assertEquals( TrieUtils.VARIANT_8BIT.longToTrieCoded(1), "\u0180\u0100\u0100\u0100\u0100\u0100\u0100\u0101");
|
||||
// Variant 4bit values
|
||||
assertEquals( TrieUtils.VARIANT_4BIT.TRIE_CODED_NUMERIC_MIN, "\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100");
|
||||
assertEquals( TrieUtils.VARIANT_4BIT.TRIE_CODED_NUMERIC_MAX, "\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f");
|
||||
assertEquals( TrieUtils.VARIANT_4BIT.longToTrieCoded(-1), "\u0107\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f");
|
||||
assertEquals( TrieUtils.VARIANT_4BIT.longToTrieCoded(0), "\u0108\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100");
|
||||
assertEquals( TrieUtils.VARIANT_4BIT.longToTrieCoded(1), "\u0108\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0101");
|
||||
// TODO: 2bit tests
|
||||
}
|
||||
|
||||
private void testBinaryOrderingAndIncrement(TrieUtils variant) throws Exception {
|
||||
public void testLongConversionAndOrdering() throws Exception {
|
||||
// generate a series of encoded longs, each numerical one bigger than the one before
|
||||
String last=null;
|
||||
for (long l=-100000L; l<100000L; l++) {
|
||||
String act=variant.longToTrieCoded(l);
|
||||
String act=TrieUtils.longToPrefixCoded(l);
|
||||
if (last!=null) {
|
||||
// test if smaller
|
||||
assertTrue( last.compareTo(act) < 0 );
|
||||
// test the increment method (the last incremented by one should be the actual)
|
||||
assertEquals( variant.incrementTrieCoded(last), act );
|
||||
// test the decrement method (the actual decremented by one should be the last)
|
||||
assertEquals( last, variant.decrementTrieCoded(act) );
|
||||
assertTrue("actual bigger than last", last.compareTo(act) < 0 );
|
||||
}
|
||||
// test is back and forward conversion works
|
||||
assertEquals("forward and back conversion should generate same long", l, TrieUtils.prefixCodedToLong(act));
|
||||
// next step
|
||||
last=act;
|
||||
}
|
||||
}
|
||||
|
||||
public void testBinaryOrderingAndIncrement_8bit() throws Exception {
|
||||
testBinaryOrderingAndIncrement(TrieUtils.VARIANT_8BIT);
|
||||
public void testIntConversionAndOrdering() throws Exception {
|
||||
// generate a series of encoded ints, each numerical one bigger than the one before
|
||||
String last=null;
|
||||
for (int i=-100000; i<100000; i++) {
|
||||
String act=TrieUtils.intToPrefixCoded(i);
|
||||
if (last!=null) {
|
||||
// test if smaller
|
||||
assertTrue("actual bigger than last", last.compareTo(act) < 0 );
|
||||
}
|
||||
// test is back and forward conversion works
|
||||
assertEquals("forward and back conversion should generate same int", i, TrieUtils.prefixCodedToInt(act));
|
||||
// next step
|
||||
last=act;
|
||||
}
|
||||
}
|
||||
|
||||
public void testBinaryOrderingAndIncrement_4bit() throws Exception {
|
||||
testBinaryOrderingAndIncrement(TrieUtils.VARIANT_8BIT);
|
||||
}
|
||||
|
||||
public void testBinaryOrderingAndIncrement_2bit() throws Exception {
|
||||
testBinaryOrderingAndIncrement(TrieUtils.VARIANT_8BIT);
|
||||
}
|
||||
|
||||
private void testLongs(TrieUtils variant) throws Exception {
|
||||
public void testLongSpecialValues() throws Exception {
|
||||
long[] vals=new long[]{
|
||||
Long.MIN_VALUE, -5000L, -4000L, -3000L, -2000L, -1000L, 0L,
|
||||
1L, 10L, 300L, 5000L, Long.MAX_VALUE-2, Long.MAX_VALUE-1, Long.MAX_VALUE
|
||||
Long.MIN_VALUE, Long.MIN_VALUE+1, Long.MIN_VALUE+2, -5003400000000L,
|
||||
-4000L, -3000L, -2000L, -1000L, -1L, 0L, 1L, 10L, 300L, 50006789999999999L, Long.MAX_VALUE-2, Long.MAX_VALUE-1, Long.MAX_VALUE
|
||||
};
|
||||
String[] trieVals=new String[vals.length];
|
||||
String[] prefixVals=new String[vals.length];
|
||||
|
||||
// check back and forth conversion
|
||||
for (int i=0; i<vals.length; i++) {
|
||||
trieVals[i]=variant.longToTrieCoded(vals[i]);
|
||||
assertEquals( "Back and forth conversion should return same value", vals[i], variant.trieCodedToLong(trieVals[i]) );
|
||||
assertEquals( "Automatic back conversion with encoding detection should return same value", vals[i], TrieUtils.trieCodedToLongAuto(trieVals[i]) );
|
||||
}
|
||||
prefixVals[i]=TrieUtils.longToPrefixCoded(vals[i]);
|
||||
|
||||
// check sort order (trieVals should be ascending)
|
||||
for (int i=1; i<vals.length; i++) {
|
||||
assertTrue( trieVals[i-1].compareTo( trieVals[i] ) < 0 );
|
||||
// check forward and back conversion
|
||||
assertEquals( "forward and back conversion should generate same long", vals[i], TrieUtils.prefixCodedToLong(prefixVals[i]) );
|
||||
|
||||
// test if decoding values as int fails correctly
|
||||
try {
|
||||
TrieUtils.prefixCodedToInt(prefixVals[i]);
|
||||
fail("decoding a prefix coded long value as int should fail");
|
||||
} catch (NumberFormatException e) {
|
||||
// worked
|
||||
}
|
||||
}
|
||||
|
||||
public void testLongs_8bit() throws Exception {
|
||||
testLongs(TrieUtils.VARIANT_8BIT);
|
||||
// check sort order (prefixVals should be ascending)
|
||||
for (int i=1; i<prefixVals.length; i++) {
|
||||
assertTrue( "check sort order", prefixVals[i-1].compareTo( prefixVals[i] ) < 0 );
|
||||
}
|
||||
|
||||
public void testLongs_4bit() throws Exception {
|
||||
testLongs(TrieUtils.VARIANT_4BIT);
|
||||
// check the prefix encoding, lower precision should have the difference to original value equal to the lower removed bits
|
||||
for (int i=0; i<vals.length; i++) {
|
||||
for (int j=0; j<64; j++) {
|
||||
long prefixVal=TrieUtils.prefixCodedToLong(TrieUtils.longToPrefixCoded(vals[i], j));
|
||||
long mask=(1L << j) - 1L;
|
||||
assertEquals( "difference between prefix val and original value for "+vals[i]+" with shift="+j, vals[i] & mask, vals[i]-prefixVal );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testLongs_2bit() throws Exception {
|
||||
testLongs(TrieUtils.VARIANT_2BIT);
|
||||
public void testIntSpecialValues() throws Exception {
|
||||
int[] vals=new int[]{
|
||||
Integer.MIN_VALUE, Integer.MIN_VALUE+1, Integer.MIN_VALUE+2, -64765767,
|
||||
-4000, -3000, -2000, -1000, -1, 0, 1, 10, 300, 765878989, Integer.MAX_VALUE-2, Integer.MAX_VALUE-1, Integer.MAX_VALUE
|
||||
};
|
||||
String[] prefixVals=new String[vals.length];
|
||||
|
||||
for (int i=0; i<vals.length; i++) {
|
||||
prefixVals[i]=TrieUtils.intToPrefixCoded(vals[i]);
|
||||
|
||||
// check forward and back conversion
|
||||
assertEquals( "forward and back conversion should generate same int", vals[i], TrieUtils.prefixCodedToInt(prefixVals[i]) );
|
||||
|
||||
// test if decoding values as long fails correctly
|
||||
try {
|
||||
TrieUtils.prefixCodedToLong(prefixVals[i]);
|
||||
fail("decoding a prefix coded int value as long should fail");
|
||||
} catch (NumberFormatException e) {
|
||||
// worked
|
||||
}
|
||||
}
|
||||
|
||||
private void testDoubles(TrieUtils variant) throws Exception {
|
||||
// check sort order (prefixVals should be ascending)
|
||||
for (int i=1; i<prefixVals.length; i++) {
|
||||
assertTrue( "check sort order", prefixVals[i-1].compareTo( prefixVals[i] ) < 0 );
|
||||
}
|
||||
|
||||
// check the prefix encoding, lower precision should have the difference to original value equal to the lower removed bits
|
||||
for (int i=0; i<vals.length; i++) {
|
||||
for (int j=0; j<32; j++) {
|
||||
int prefixVal=TrieUtils.prefixCodedToInt(TrieUtils.intToPrefixCoded(vals[i], j));
|
||||
int mask=(1 << j) - 1;
|
||||
assertEquals( "difference between prefix val and original value for "+vals[i]+" with shift="+j, vals[i] & mask, vals[i]-prefixVal );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testDoubles() throws Exception {
|
||||
double[] vals=new double[]{
|
||||
Double.NEGATIVE_INFINITY, -2.3E25, -1.0E15, -1.0, -1.0E-1, -1.0E-2, -0.0,
|
||||
+0.0, 1.0E-2, 1.0E-1, 1.0, 1.0E15, 2.3E25, Double.POSITIVE_INFINITY
|
||||
};
|
||||
String[] trieVals=new String[vals.length];
|
||||
long[] longVals=new long[vals.length];
|
||||
|
||||
// check back and forth conversion
|
||||
// check forward and back conversion
|
||||
for (int i=0; i<vals.length; i++) {
|
||||
trieVals[i]=variant.doubleToTrieCoded(vals[i]);
|
||||
assertTrue( "Back and forth conversion should return same value", vals[i]==variant.trieCodedToDouble(trieVals[i]) );
|
||||
assertTrue( "Automatic back conversion with encoding detection should return same value", vals[i]==TrieUtils.trieCodedToDoubleAuto(trieVals[i]) );
|
||||
longVals[i]=TrieUtils.doubleToSortableLong(vals[i]);
|
||||
assertTrue( "forward and back conversion should generate same double", Double.compare(vals[i], TrieUtils.sortableLongToDouble(longVals[i]))==0 );
|
||||
}
|
||||
|
||||
// check sort order (trieVals should be ascending)
|
||||
for (int i=1; i<vals.length; i++) {
|
||||
assertTrue( trieVals[i-1].compareTo( trieVals[i] ) < 0 );
|
||||
// check sort order (prefixVals should be ascending)
|
||||
for (int i=1; i<longVals.length; i++) {
|
||||
assertTrue( "check sort order", longVals[i-1] < longVals[i] );
|
||||
}
|
||||
}
|
||||
|
||||
public void testDoubles_8bit() throws Exception {
|
||||
testDoubles(TrieUtils.VARIANT_8BIT);
|
||||
}
|
||||
|
||||
public void testDoubles_4bit() throws Exception {
|
||||
testDoubles(TrieUtils.VARIANT_4BIT);
|
||||
}
|
||||
|
||||
public void testDoubles_2bit() throws Exception {
|
||||
testDoubles(TrieUtils.VARIANT_2BIT);
|
||||
}
|
||||
|
||||
private void testDates(TrieUtils variant) throws Exception {
|
||||
Date[] vals=new Date[]{
|
||||
new GregorianCalendar(1000,1,1).getTime(),
|
||||
new GregorianCalendar(1999,1,1).getTime(),
|
||||
new GregorianCalendar(2000,1,1).getTime(),
|
||||
new GregorianCalendar(2001,1,1).getTime()
|
||||
public void testFloats() throws Exception {
|
||||
float[] vals=new float[]{
|
||||
Float.NEGATIVE_INFINITY, -2.3E25f, -1.0E15f, -1.0f, -1.0E-1f, -1.0E-2f, -0.0f,
|
||||
+0.0f, 1.0E-2f, 1.0E-1f, 1.0f, 1.0E15f, 2.3E25f, Float.POSITIVE_INFINITY
|
||||
};
|
||||
String[] trieVals=new String[vals.length];
|
||||
int[] intVals=new int[vals.length];
|
||||
|
||||
// check back and forth conversion
|
||||
// check forward and back conversion
|
||||
for (int i=0; i<vals.length; i++) {
|
||||
trieVals[i]=variant.dateToTrieCoded(vals[i]);
|
||||
assertEquals( "Back and forth conversion should return same value", vals[i], variant.trieCodedToDate(trieVals[i]) );
|
||||
assertEquals( "Automatic back conversion with encoding detection should return same value", vals[i], TrieUtils.trieCodedToDateAuto(trieVals[i]) );
|
||||
intVals[i]=TrieUtils.floatToSortableInt(vals[i]);
|
||||
assertTrue( "forward and back conversion should generate same double", Float.compare(vals[i], TrieUtils.sortableIntToFloat(intVals[i]))==0 );
|
||||
}
|
||||
|
||||
// check sort order (trieVals should be ascending)
|
||||
for (int i=1; i<vals.length; i++) {
|
||||
assertTrue( trieVals[i-1].compareTo( trieVals[i] ) < 0 );
|
||||
// check sort order (prefixVals should be ascending)
|
||||
for (int i=1; i<intVals.length; i++) {
|
||||
assertTrue( "check sort order", intVals[i-1] < intVals[i] );
|
||||
}
|
||||
}
|
||||
|
||||
public void testDates_8bit() throws Exception {
|
||||
testDates(TrieUtils.VARIANT_8BIT);
|
||||
// INFO: Tests for trieCodeLong()/trieCodeInt() not needed because implicitely tested by range filter tests
|
||||
|
||||
/** Note: The neededBounds iterator must be unsigned (easier understanding what's happening) */
|
||||
protected void assertLongRangeSplit(final long lower, final long upper, int precisionStep,
|
||||
final boolean useBitSet, final Iterator neededBounds
|
||||
) throws Exception {
|
||||
final OpenBitSet bits=useBitSet ? new OpenBitSet(upper-lower+1) : null;
|
||||
|
||||
TrieUtils.splitLongRange(new TrieUtils.LongRangeBuilder() {
|
||||
public void addRange(int precisionStep, long min, long max, int shift) {
|
||||
assertTrue("min, max should be inside bounds", min>=lower && min<=upper && max>=lower && max<=upper);
|
||||
if (useBitSet) for (long l=min; l<=max; l++) {
|
||||
assertFalse("ranges should not overlap", bits.getAndSet(l-lower) );
|
||||
}
|
||||
// make unsigned longs for easier display and understanding
|
||||
min ^= 0x8000000000000000L;
|
||||
max ^= 0x8000000000000000L;
|
||||
//System.out.println("new Long(0x"+Long.toHexString(min>>>shift)+"L),new Long(0x"+Long.toHexString(max>>>shift)+"L),");
|
||||
assertEquals( "inner min bound", ((Long)neededBounds.next()).longValue(), min>>>shift);
|
||||
assertEquals( "inner max bound", ((Long)neededBounds.next()).longValue(), max>>>shift);
|
||||
}
|
||||
}, precisionStep, lower, upper);
|
||||
|
||||
if (useBitSet) {
|
||||
// after flipping all bits in the range, the cardinality should be zero
|
||||
bits.flip(0,upper-lower+1);
|
||||
assertTrue("The sub-range concenated should match the whole range", bits.isEmpty());
|
||||
}
|
||||
}
|
||||
|
||||
public void testDates_4bit() throws Exception {
|
||||
testDates(TrieUtils.VARIANT_4BIT);
|
||||
public void testSplitLongRange() throws Exception {
|
||||
// a hard-coded "standard" range
|
||||
assertLongRangeSplit(-5000L, 9500L, 4, true, Arrays.asList(new Long[]{
|
||||
new Long(0x7fffffffffffec78L),new Long(0x7fffffffffffec7fL),
|
||||
new Long(0x8000000000002510L),new Long(0x800000000000251cL),
|
||||
new Long(0x7fffffffffffec8L), new Long(0x7fffffffffffecfL),
|
||||
new Long(0x800000000000250L), new Long(0x800000000000250L),
|
||||
new Long(0x7fffffffffffedL), new Long(0x7fffffffffffefL),
|
||||
new Long(0x80000000000020L), new Long(0x80000000000024L),
|
||||
new Long(0x7ffffffffffffL), new Long(0x8000000000001L)
|
||||
}).iterator());
|
||||
|
||||
// the same with no range splitting
|
||||
assertLongRangeSplit(-5000L, 9500L, 64, true, Arrays.asList(new Long[]{
|
||||
new Long(0x7fffffffffffec78L),new Long(0x800000000000251cL)
|
||||
}).iterator());
|
||||
|
||||
// this tests optimized range splitting, if one of the inner bounds
|
||||
// is also the bound of the next lower precision, it should be used completely
|
||||
assertLongRangeSplit(0L, 1024L+63L, 4, true, Arrays.asList(new Long[]{
|
||||
new Long(0x800000000000040L), new Long(0x800000000000043L),
|
||||
new Long(0x80000000000000L), new Long(0x80000000000003L)
|
||||
}).iterator());
|
||||
|
||||
// the full long range should only consist of a lowest precision range; no bitset testing here, as too much memory needed :-)
|
||||
assertLongRangeSplit(Long.MIN_VALUE, Long.MAX_VALUE, 8, false, Arrays.asList(new Long[]{
|
||||
new Long(0x00L),new Long(0xffL)
|
||||
}).iterator());
|
||||
|
||||
// the same with precisionStep=4
|
||||
assertLongRangeSplit(Long.MIN_VALUE, Long.MAX_VALUE, 4, false, Arrays.asList(new Long[]{
|
||||
new Long(0x0L),new Long(0xfL)
|
||||
}).iterator());
|
||||
|
||||
// the same with precisionStep=2
|
||||
assertLongRangeSplit(Long.MIN_VALUE, Long.MAX_VALUE, 2, false, Arrays.asList(new Long[]{
|
||||
new Long(0x0L),new Long(0x3L)
|
||||
}).iterator());
|
||||
|
||||
// the same with precisionStep=1
|
||||
assertLongRangeSplit(Long.MIN_VALUE, Long.MAX_VALUE, 1, false, Arrays.asList(new Long[]{
|
||||
new Long(0x0L),new Long(0x1L)
|
||||
}).iterator());
|
||||
}
|
||||
|
||||
public void testDates_2bit() throws Exception {
|
||||
testDates(TrieUtils.VARIANT_2BIT);
|
||||
/** Note: The neededBounds iterator must be unsigned (easier understanding what's happening) */
|
||||
protected void assertIntRangeSplit(final int lower, final int upper, int precisionStep,
|
||||
final boolean useBitSet, final Iterator neededBounds
|
||||
) throws Exception {
|
||||
final OpenBitSet bits=useBitSet ? new OpenBitSet(upper-lower+1) : null;
|
||||
|
||||
TrieUtils.splitIntRange(new TrieUtils.IntRangeBuilder() {
|
||||
public void addRange(int precisionStep, int min, int max, int shift) {
|
||||
assertTrue("min, max should be inside bounds", min>=lower && min<=upper && max>=lower && max<=upper);
|
||||
if (useBitSet) for (int i=min; i<=max; i++) {
|
||||
assertFalse("ranges should not overlap", bits.getAndSet(i-lower) );
|
||||
}
|
||||
// make unsigned ints for easier display and understanding
|
||||
min ^= 0x80000000;
|
||||
max ^= 0x80000000;
|
||||
//System.out.println("new Integer(0x"+Integer.toHexString(min>>>shift)+"),new Integer(0x"+Integer.toHexString(max>>>shift)+"),");
|
||||
assertEquals( "inner min bound", ((Integer)neededBounds.next()).intValue(), min>>>shift);
|
||||
assertEquals( "inner max bound", ((Integer)neededBounds.next()).intValue(), max>>>shift);
|
||||
}
|
||||
}, precisionStep, lower, upper);
|
||||
|
||||
if (useBitSet) {
|
||||
// after flipping all bits in the range, the cardinality should be zero
|
||||
bits.flip(0,upper-lower+1);
|
||||
assertTrue("The sub-range concenated should match the whole range", bits.isEmpty());
|
||||
}
|
||||
}
|
||||
|
||||
public void testSplitIntRange() throws Exception {
|
||||
// a hard-coded "standard" range
|
||||
assertIntRangeSplit(-5000, 9500, 4, true, Arrays.asList(new Integer[]{
|
||||
new Integer(0x7fffec78),new Integer(0x7fffec7f),
|
||||
new Integer(0x80002510),new Integer(0x8000251c),
|
||||
new Integer(0x7fffec8), new Integer(0x7fffecf),
|
||||
new Integer(0x8000250), new Integer(0x8000250),
|
||||
new Integer(0x7fffed), new Integer(0x7fffef),
|
||||
new Integer(0x800020), new Integer(0x800024),
|
||||
new Integer(0x7ffff), new Integer(0x80001)
|
||||
}).iterator());
|
||||
|
||||
// the same with no range splitting
|
||||
assertIntRangeSplit(-5000, 9500, 32, true, Arrays.asList(new Integer[]{
|
||||
new Integer(0x7fffec78),new Integer(0x8000251c)
|
||||
}).iterator());
|
||||
|
||||
// this tests optimized range splitting, if one of the inner bounds
|
||||
// is also the bound of the next lower precision, it should be used completely
|
||||
assertIntRangeSplit(0, 1024+63, 4, true, Arrays.asList(new Integer[]{
|
||||
new Integer(0x8000040), new Integer(0x8000043),
|
||||
new Integer(0x800000), new Integer(0x800003)
|
||||
}).iterator());
|
||||
|
||||
// the full int range should only consist of a lowest precision range; no bitset testing here, as too much memory needed :-)
|
||||
assertIntRangeSplit(Integer.MIN_VALUE, Integer.MAX_VALUE, 8, false, Arrays.asList(new Integer[]{
|
||||
new Integer(0x00),new Integer(0xff)
|
||||
}).iterator());
|
||||
|
||||
// the same with precisionStep=4
|
||||
assertIntRangeSplit(Integer.MIN_VALUE, Integer.MAX_VALUE, 4, false, Arrays.asList(new Integer[]{
|
||||
new Integer(0x0),new Integer(0xf)
|
||||
}).iterator());
|
||||
|
||||
// the same with precisionStep=2
|
||||
assertIntRangeSplit(Integer.MIN_VALUE, Integer.MAX_VALUE, 2, false, Arrays.asList(new Integer[]{
|
||||
new Integer(0x0),new Integer(0x3)
|
||||
}).iterator());
|
||||
|
||||
// the same with precisionStep=1
|
||||
assertIntRangeSplit(Integer.MIN_VALUE, Integer.MAX_VALUE, 1, false, Arrays.asList(new Integer[]{
|
||||
new Integer(0x0),new Integer(0x1)
|
||||
}).iterator());
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue