mirror of https://github.com/apache/lucene.git
As suggested by Otis: Convert TABs to spaces, remove @author javadoc tags
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@733443 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
d7376608b2
commit
c2b5e91ddd
|
@ -33,258 +33,257 @@ import org.apache.lucene.util.OpenBitSet;
|
|||
* This filter depends on a specific structure of terms in the index that can only be created
|
||||
* by {@link TrieUtils} methods.
|
||||
* For more information, how the algorithm works, see the package description {@link org.apache.lucene.search.trie}.
|
||||
* @author Uwe Schindler (panFMP developer)
|
||||
*/
|
||||
public final class TrieRangeFilter extends Filter {
|
||||
|
||||
/**
|
||||
* Universal constructor (expert use only): Uses already trie-converted min/max values.
|
||||
* You can set <code>min</code> or <code>max</code> (but not both) to <code>null</code> to leave one bound open.
|
||||
*/
|
||||
public TrieRangeFilter(final String field, final String min, final String max, final TrieUtils variant) {
|
||||
if (min==null && max==null) throw new IllegalArgumentException("The min and max values cannot be both null.");
|
||||
this.trieVariant=variant;
|
||||
this.minUnconverted=min;
|
||||
this.maxUnconverted=max;
|
||||
this.min=(min==null) ? trieVariant.TRIE_CODED_NUMERIC_MIN : min;
|
||||
this.max=(max==null) ? trieVariant.TRIE_CODED_NUMERIC_MAX : max;
|
||||
this.field=field.intern();
|
||||
}
|
||||
/**
|
||||
* Universal constructor (expert use only): Uses already trie-converted min/max values.
|
||||
* You can set <code>min</code> or <code>max</code> (but not both) to <code>null</code> to leave one bound open.
|
||||
*/
|
||||
public TrieRangeFilter(final String field, final String min, final String max, final TrieUtils variant) {
|
||||
if (min==null && max==null) throw new IllegalArgumentException("The min and max values cannot be both null.");
|
||||
this.trieVariant=variant;
|
||||
this.minUnconverted=min;
|
||||
this.maxUnconverted=max;
|
||||
this.min=(min==null) ? trieVariant.TRIE_CODED_NUMERIC_MIN : min;
|
||||
this.max=(max==null) ? trieVariant.TRIE_CODED_NUMERIC_MAX : max;
|
||||
this.field=field.intern();
|
||||
}
|
||||
|
||||
/**
|
||||
* Universal constructor (expert use only): Uses already trie-converted min/max values.
|
||||
* You can set <code>min</code> or <code>max</code> (but not both) to <code>null</code> to leave one bound open.
|
||||
* <p>This constructor uses the trie package returned by {@link TrieUtils#getDefaultTrieVariant()}.
|
||||
*/
|
||||
public TrieRangeFilter(final String field, final String min, final String max) {
|
||||
this(field,min,max,TrieUtils.getDefaultTrieVariant());
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a trie query using the supplied field with range bounds in numeric form (double).
|
||||
* You can set <code>min</code> or <code>max</code> (but not both) to <code>null</code> to leave one bound open.
|
||||
*/
|
||||
public TrieRangeFilter(final String field, final Double min, final Double max, final TrieUtils variant) {
|
||||
this(
|
||||
field,
|
||||
(min==null) ? null : variant.doubleToTrieCoded(min.doubleValue()),
|
||||
(max==null) ? null : variant.doubleToTrieCoded(max.doubleValue()),
|
||||
variant
|
||||
);
|
||||
this.minUnconverted=min;
|
||||
this.maxUnconverted=max;
|
||||
}
|
||||
/**
|
||||
* Universal constructor (expert use only): Uses already trie-converted min/max values.
|
||||
* You can set <code>min</code> or <code>max</code> (but not both) to <code>null</code> to leave one bound open.
|
||||
* <p>This constructor uses the trie package returned by {@link TrieUtils#getDefaultTrieVariant()}.
|
||||
*/
|
||||
public TrieRangeFilter(final String field, final String min, final String max) {
|
||||
this(field,min,max,TrieUtils.getDefaultTrieVariant());
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a trie query using the supplied field with range bounds in numeric form (double).
|
||||
* You can set <code>min</code> or <code>max</code> (but not both) to <code>null</code> to leave one bound open.
|
||||
*/
|
||||
public TrieRangeFilter(final String field, final Double min, final Double max, final TrieUtils variant) {
|
||||
this(
|
||||
field,
|
||||
(min==null) ? null : variant.doubleToTrieCoded(min.doubleValue()),
|
||||
(max==null) ? null : variant.doubleToTrieCoded(max.doubleValue()),
|
||||
variant
|
||||
);
|
||||
this.minUnconverted=min;
|
||||
this.maxUnconverted=max;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a trie query using the supplied field with range bounds in numeric form (double).
|
||||
* You can set <code>min</code> or <code>max</code> (but not both) to <code>null</code> to leave one bound open.
|
||||
* <p>This constructor uses the trie variant returned by {@link TrieUtils#getDefaultTrieVariant()}.
|
||||
*/
|
||||
public TrieRangeFilter(final String field, final Double min, final Double max) {
|
||||
this(field,min,max,TrieUtils.getDefaultTrieVariant());
|
||||
}
|
||||
/**
|
||||
* Generates a trie query using the supplied field with range bounds in numeric form (double).
|
||||
* You can set <code>min</code> or <code>max</code> (but not both) to <code>null</code> to leave one bound open.
|
||||
* <p>This constructor uses the trie variant returned by {@link TrieUtils#getDefaultTrieVariant()}.
|
||||
*/
|
||||
public TrieRangeFilter(final String field, final Double min, final Double max) {
|
||||
this(field,min,max,TrieUtils.getDefaultTrieVariant());
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a trie query using the supplied field with range bounds in date/time form.
|
||||
* You can set <code>min</code> or <code>max</code> (but not both) to <code>null</code> to leave one bound open.
|
||||
*/
|
||||
public TrieRangeFilter(final String field, final Date min, final Date max, final TrieUtils variant) {
|
||||
this(
|
||||
field,
|
||||
(min==null) ? null : variant.dateToTrieCoded(min),
|
||||
(max==null) ? null : variant.dateToTrieCoded(max),
|
||||
variant
|
||||
);
|
||||
this.minUnconverted=min;
|
||||
this.maxUnconverted=max;
|
||||
}
|
||||
/**
|
||||
* Generates a trie query using the supplied field with range bounds in date/time form.
|
||||
* You can set <code>min</code> or <code>max</code> (but not both) to <code>null</code> to leave one bound open.
|
||||
*/
|
||||
public TrieRangeFilter(final String field, final Date min, final Date max, final TrieUtils variant) {
|
||||
this(
|
||||
field,
|
||||
(min==null) ? null : variant.dateToTrieCoded(min),
|
||||
(max==null) ? null : variant.dateToTrieCoded(max),
|
||||
variant
|
||||
);
|
||||
this.minUnconverted=min;
|
||||
this.maxUnconverted=max;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a trie query using the supplied field with range bounds in date/time form.
|
||||
* You can set <code>min</code> or <code>max</code> (but not both) to <code>null</code> to leave one bound open.
|
||||
* <p>This constructor uses the trie variant returned by {@link TrieUtils#getDefaultTrieVariant()}.
|
||||
*/
|
||||
public TrieRangeFilter(final String field, final Date min, final Date max) {
|
||||
this(field,min,max,TrieUtils.getDefaultTrieVariant());
|
||||
}
|
||||
/**
|
||||
* Generates a trie query using the supplied field with range bounds in date/time form.
|
||||
* You can set <code>min</code> or <code>max</code> (but not both) to <code>null</code> to leave one bound open.
|
||||
* <p>This constructor uses the trie variant returned by {@link TrieUtils#getDefaultTrieVariant()}.
|
||||
*/
|
||||
public TrieRangeFilter(final String field, final Date min, final Date max) {
|
||||
this(field,min,max,TrieUtils.getDefaultTrieVariant());
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a trie query using the supplied field with range bounds in integer form (long).
|
||||
* You can set <code>min</code> or <code>max</code> (but not both) to <code>null</code> to leave one bound open.
|
||||
*/
|
||||
public TrieRangeFilter(final String field, final Long min, final Long max, final TrieUtils variant) {
|
||||
this(
|
||||
field,
|
||||
(min==null) ? null : variant.longToTrieCoded(min.longValue()),
|
||||
(max==null) ? null : variant.longToTrieCoded(max.longValue()),
|
||||
variant
|
||||
);
|
||||
this.minUnconverted=min;
|
||||
this.maxUnconverted=max;
|
||||
}
|
||||
/**
|
||||
* Generates a trie query using the supplied field with range bounds in integer form (long).
|
||||
* You can set <code>min</code> or <code>max</code> (but not both) to <code>null</code> to leave one bound open.
|
||||
*/
|
||||
public TrieRangeFilter(final String field, final Long min, final Long max, final TrieUtils variant) {
|
||||
this(
|
||||
field,
|
||||
(min==null) ? null : variant.longToTrieCoded(min.longValue()),
|
||||
(max==null) ? null : variant.longToTrieCoded(max.longValue()),
|
||||
variant
|
||||
);
|
||||
this.minUnconverted=min;
|
||||
this.maxUnconverted=max;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a trie query using the supplied field with range bounds in integer form (long).
|
||||
* You can set <code>min</code> or <code>max</code> (but not both) to <code>null</code> to leave one bound open.
|
||||
* <p>This constructor uses the trie variant returned by {@link TrieUtils#getDefaultTrieVariant()}.
|
||||
*/
|
||||
public TrieRangeFilter(final String field, final Long min, final Long max) {
|
||||
this(field,min,max,TrieUtils.getDefaultTrieVariant());
|
||||
}
|
||||
/**
|
||||
* Generates a trie query using the supplied field with range bounds in integer form (long).
|
||||
* You can set <code>min</code> or <code>max</code> (but not both) to <code>null</code> to leave one bound open.
|
||||
* <p>This constructor uses the trie variant returned by {@link TrieUtils#getDefaultTrieVariant()}.
|
||||
*/
|
||||
public TrieRangeFilter(final String field, final Long min, final Long max) {
|
||||
this(field,min,max,TrieUtils.getDefaultTrieVariant());
|
||||
}
|
||||
|
||||
//@Override
|
||||
public String toString() {
|
||||
return toString(null);
|
||||
}
|
||||
//@Override
|
||||
public String toString() {
|
||||
return toString(null);
|
||||
}
|
||||
|
||||
public String toString(final String field) {
|
||||
final StringBuffer sb=new StringBuffer();
|
||||
if (!this.field.equals(field)) sb.append(this.field).append(':');
|
||||
return sb.append('[').append(minUnconverted).append(" TO ").append(maxUnconverted).append(']').toString();
|
||||
}
|
||||
public String toString(final String field) {
|
||||
final StringBuffer sb=new StringBuffer();
|
||||
if (!this.field.equals(field)) sb.append(this.field).append(':');
|
||||
return sb.append('[').append(minUnconverted).append(" TO ").append(maxUnconverted).append(']').toString();
|
||||
}
|
||||
|
||||
//@Override
|
||||
public final boolean equals(final Object o) {
|
||||
if (o instanceof TrieRangeFilter) {
|
||||
TrieRangeFilter q=(TrieRangeFilter)o;
|
||||
// trieVariants are singleton per type, so no equals needed
|
||||
return (field==q.field && min.equals(q.min) && max.equals(q.max) && trieVariant==q.trieVariant);
|
||||
} else return false;
|
||||
}
|
||||
//@Override
|
||||
public final boolean equals(final Object o) {
|
||||
if (o instanceof TrieRangeFilter) {
|
||||
TrieRangeFilter q=(TrieRangeFilter)o;
|
||||
// trieVariants are singleton per type, so no equals needed
|
||||
return (field==q.field && min.equals(q.min) && max.equals(q.max) && trieVariant==q.trieVariant);
|
||||
} else return false;
|
||||
}
|
||||
|
||||
//@Override
|
||||
public final int hashCode() {
|
||||
// the hash code uses from the variant only the number of bits, as this is unique for the variant
|
||||
return field.hashCode()+(min.hashCode()^0x14fa55fb)+(max.hashCode()^0x733fa5fe)+(trieVariant.TRIE_BITS^0x64365465);
|
||||
}
|
||||
|
||||
/** prints the String in hexadecimal \\u notation (for debugging of <code>setBits()</code>) */
|
||||
private String stringToHexDigits(final String s) {
|
||||
StringBuffer sb=new StringBuffer(s.length()*3);
|
||||
for (int i=0,c=s.length(); i<c; i++) {
|
||||
char ch=s.charAt(i);
|
||||
sb.append("\\u").append(Integer.toHexString((int)ch));
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
//@Override
|
||||
public final int hashCode() {
|
||||
// the hash code uses from the variant only the number of bits, as this is unique for the variant
|
||||
return field.hashCode()+(min.hashCode()^0x14fa55fb)+(max.hashCode()^0x733fa5fe)+(trieVariant.TRIE_BITS^0x64365465);
|
||||
}
|
||||
|
||||
/** prints the String in hexadecimal \\u notation (for debugging of <code>setBits()</code>) */
|
||||
private String stringToHexDigits(final String s) {
|
||||
StringBuffer sb=new StringBuffer(s.length()*3);
|
||||
for (int i=0,c=s.length(); i<c; i++) {
|
||||
char ch=s.charAt(i);
|
||||
sb.append("\\u").append(Integer.toHexString((int)ch));
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
/** Marks documents in a specific range. Code borrowed from original RangeFilter and simplified (and returns number of terms) */
|
||||
private int setBits(final IndexReader reader, final TermDocs termDocs, final OpenBitSet bits, String lowerTerm, String upperTerm) throws IOException {
|
||||
//System.out.println(stringToHexDigits(lowerTerm)+" TO "+stringToHexDigits(upperTerm));
|
||||
int count=0,len=lowerTerm.length();
|
||||
final String field;
|
||||
if (len<trieVariant.TRIE_CODED_LENGTH) {
|
||||
// lower precision value is in helper field
|
||||
field=(this.field + trieVariant.LOWER_PRECISION_FIELD_NAME_SUFFIX).intern();
|
||||
// add padding before lower precision values to group them
|
||||
lowerTerm=new StringBuffer(len+1).append((char)(trieVariant.TRIE_CODED_PADDING_START+len)).append(lowerTerm).toString();
|
||||
upperTerm=new StringBuffer(len+1).append((char)(trieVariant.TRIE_CODED_PADDING_START+len)).append(upperTerm).toString();
|
||||
// length is longer by 1 char because of padding
|
||||
len++;
|
||||
} else {
|
||||
// full precision value is in original field
|
||||
field=this.field;
|
||||
}
|
||||
final TermEnum enumerator = reader.terms(new Term(field, lowerTerm));
|
||||
try {
|
||||
do {
|
||||
final Term term = enumerator.term();
|
||||
if (term!=null && term.field()==field) {
|
||||
// break out when upperTerm reached or length of term is different
|
||||
final String t=term.text();
|
||||
if (len!=t.length() || t.compareTo(upperTerm)>0) break;
|
||||
// we have a good term, find the docs
|
||||
count++;
|
||||
termDocs.seek(enumerator);
|
||||
while (termDocs.next()) bits.set(termDocs.doc());
|
||||
} else break;
|
||||
} while (enumerator.next());
|
||||
} finally {
|
||||
enumerator.close();
|
||||
}
|
||||
return count;
|
||||
}
|
||||
/** Marks documents in a specific range. Code borrowed from original RangeFilter and simplified (and returns number of terms) */
|
||||
private int setBits(final IndexReader reader, final TermDocs termDocs, final OpenBitSet bits, String lowerTerm, String upperTerm) throws IOException {
|
||||
//System.out.println(stringToHexDigits(lowerTerm)+" TO "+stringToHexDigits(upperTerm));
|
||||
int count=0,len=lowerTerm.length();
|
||||
final String field;
|
||||
if (len<trieVariant.TRIE_CODED_LENGTH) {
|
||||
// lower precision value is in helper field
|
||||
field=(this.field + trieVariant.LOWER_PRECISION_FIELD_NAME_SUFFIX).intern();
|
||||
// add padding before lower precision values to group them
|
||||
lowerTerm=new StringBuffer(len+1).append((char)(trieVariant.TRIE_CODED_PADDING_START+len)).append(lowerTerm).toString();
|
||||
upperTerm=new StringBuffer(len+1).append((char)(trieVariant.TRIE_CODED_PADDING_START+len)).append(upperTerm).toString();
|
||||
// length is longer by 1 char because of padding
|
||||
len++;
|
||||
} else {
|
||||
// full precision value is in original field
|
||||
field=this.field;
|
||||
}
|
||||
final TermEnum enumerator = reader.terms(new Term(field, lowerTerm));
|
||||
try {
|
||||
do {
|
||||
final Term term = enumerator.term();
|
||||
if (term!=null && term.field()==field) {
|
||||
// break out when upperTerm reached or length of term is different
|
||||
final String t=term.text();
|
||||
if (len!=t.length() || t.compareTo(upperTerm)>0) break;
|
||||
// we have a good term, find the docs
|
||||
count++;
|
||||
termDocs.seek(enumerator);
|
||||
while (termDocs.next()) bits.set(termDocs.doc());
|
||||
} else break;
|
||||
} while (enumerator.next());
|
||||
} finally {
|
||||
enumerator.close();
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
/** Splits range recursively (and returns number of terms) */
|
||||
private int splitRange(
|
||||
final IndexReader reader, final TermDocs termDocs, final OpenBitSet bits,
|
||||
final String min, final boolean lowerBoundOpen, final String max, final boolean upperBoundOpen
|
||||
) throws IOException {
|
||||
int count=0;
|
||||
final int length=min.length();
|
||||
final String minShort=lowerBoundOpen ? min.substring(0,length-1) : trieVariant.incrementTrieCoded(min.substring(0,length-1));
|
||||
final String maxShort=upperBoundOpen ? max.substring(0,length-1) : trieVariant.decrementTrieCoded(max.substring(0,length-1));
|
||||
/** Splits range recursively (and returns number of terms) */
|
||||
private int splitRange(
|
||||
final IndexReader reader, final TermDocs termDocs, final OpenBitSet bits,
|
||||
final String min, final boolean lowerBoundOpen, final String max, final boolean upperBoundOpen
|
||||
) throws IOException {
|
||||
int count=0;
|
||||
final int length=min.length();
|
||||
final String minShort=lowerBoundOpen ? min.substring(0,length-1) : trieVariant.incrementTrieCoded(min.substring(0,length-1));
|
||||
final String maxShort=upperBoundOpen ? max.substring(0,length-1) : trieVariant.decrementTrieCoded(max.substring(0,length-1));
|
||||
|
||||
if (length==1 || minShort.compareTo(maxShort)>=0) {
|
||||
// we are in the lowest precision or the current precision is not existent
|
||||
count+=setBits(reader, termDocs, bits, min, max);
|
||||
} else {
|
||||
// Avoid too much seeking: first go deeper into lower precision
|
||||
// (in IndexReader's TermEnum these terms are earlier).
|
||||
// Do this only, if the current length is not trieVariant.TRIE_CODED_LENGTH (not full precision),
|
||||
// because terms from the highest prec come before all lower prec terms
|
||||
// (because the field name is ordered before the suffixed one).
|
||||
if (length!=trieVariant.TRIE_CODED_LENGTH) count+=splitRange(
|
||||
reader,termDocs,bits,
|
||||
minShort,lowerBoundOpen,
|
||||
maxShort,upperBoundOpen
|
||||
);
|
||||
// Avoid too much seeking: set bits for lower part of current (higher) precision.
|
||||
// These terms come later in IndexReader's TermEnum.
|
||||
if (!lowerBoundOpen) {
|
||||
count+=setBits(reader, termDocs, bits, min, trieVariant.decrementTrieCoded(minShort+trieVariant.TRIE_CODED_SYMBOL_MIN));
|
||||
}
|
||||
// Avoid too much seeking: set bits for upper part of current precision.
|
||||
// These terms come later in IndexReader's TermEnum.
|
||||
if (!upperBoundOpen) {
|
||||
count+=setBits(reader, termDocs, bits, trieVariant.incrementTrieCoded(maxShort+trieVariant.TRIE_CODED_SYMBOL_MAX), max);
|
||||
}
|
||||
// If the first step (see above) was not done (because length==trieVariant.TRIE_CODED_LENGTH) we do it now.
|
||||
if (length==trieVariant.TRIE_CODED_LENGTH) count+=splitRange(
|
||||
reader,termDocs,bits,
|
||||
minShort,lowerBoundOpen,
|
||||
maxShort,upperBoundOpen
|
||||
);
|
||||
}
|
||||
return count;
|
||||
}
|
||||
if (length==1 || minShort.compareTo(maxShort)>=0) {
|
||||
// we are in the lowest precision or the current precision is not existent
|
||||
count+=setBits(reader, termDocs, bits, min, max);
|
||||
} else {
|
||||
// Avoid too much seeking: first go deeper into lower precision
|
||||
// (in IndexReader's TermEnum these terms are earlier).
|
||||
// Do this only, if the current length is not trieVariant.TRIE_CODED_LENGTH (not full precision),
|
||||
// because terms from the highest prec come before all lower prec terms
|
||||
// (because the field name is ordered before the suffixed one).
|
||||
if (length!=trieVariant.TRIE_CODED_LENGTH) count+=splitRange(
|
||||
reader,termDocs,bits,
|
||||
minShort,lowerBoundOpen,
|
||||
maxShort,upperBoundOpen
|
||||
);
|
||||
// Avoid too much seeking: set bits for lower part of current (higher) precision.
|
||||
// These terms come later in IndexReader's TermEnum.
|
||||
if (!lowerBoundOpen) {
|
||||
count+=setBits(reader, termDocs, bits, min, trieVariant.decrementTrieCoded(minShort+trieVariant.TRIE_CODED_SYMBOL_MIN));
|
||||
}
|
||||
// Avoid too much seeking: set bits for upper part of current precision.
|
||||
// These terms come later in IndexReader's TermEnum.
|
||||
if (!upperBoundOpen) {
|
||||
count+=setBits(reader, termDocs, bits, trieVariant.incrementTrieCoded(maxShort+trieVariant.TRIE_CODED_SYMBOL_MAX), max);
|
||||
}
|
||||
// If the first step (see above) was not done (because length==trieVariant.TRIE_CODED_LENGTH) we do it now.
|
||||
if (length==trieVariant.TRIE_CODED_LENGTH) count+=splitRange(
|
||||
reader,termDocs,bits,
|
||||
minShort,lowerBoundOpen,
|
||||
maxShort,upperBoundOpen
|
||||
);
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a DocIdSet that provides the documents which should be permitted or prohibited in search results.
|
||||
*/
|
||||
//@Override
|
||||
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
|
||||
final OpenBitSet bits = new OpenBitSet(reader.maxDoc());
|
||||
final TermDocs termDocs=reader.termDocs();
|
||||
try {
|
||||
final int count=splitRange(
|
||||
reader,termDocs,bits,
|
||||
min,trieVariant.TRIE_CODED_NUMERIC_MIN.equals(min),
|
||||
max,trieVariant.TRIE_CODED_NUMERIC_MAX.equals(max)
|
||||
);
|
||||
lastNumberOfTerms=new Integer(count);
|
||||
//System.out.println("Found "+count+" distinct terms in filtered range for field '"+field+"'.");
|
||||
} finally {
|
||||
termDocs.close();
|
||||
}
|
||||
return bits;
|
||||
}
|
||||
|
||||
/**
|
||||
* EXPERT: Return the number of terms visited during the last execution of {@link #getDocIdSet}.
|
||||
* This may be used for performance comparisons of different trie variants and their effectiveness.
|
||||
* This method is not thread safe, be sure to only call it when no query is running!
|
||||
* @throws IllegalStateException if {@link #getDocIdSet} was not yet executed.
|
||||
*/
|
||||
//@Override
|
||||
public int getLastNumberOfTerms() {
|
||||
if (lastNumberOfTerms==null) throw new IllegalStateException();
|
||||
return lastNumberOfTerms.intValue();
|
||||
}
|
||||
/**
|
||||
* Returns a DocIdSet that provides the documents which should be permitted or prohibited in search results.
|
||||
*/
|
||||
//@Override
|
||||
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
|
||||
final OpenBitSet bits = new OpenBitSet(reader.maxDoc());
|
||||
final TermDocs termDocs=reader.termDocs();
|
||||
try {
|
||||
final int count=splitRange(
|
||||
reader,termDocs,bits,
|
||||
min,trieVariant.TRIE_CODED_NUMERIC_MIN.equals(min),
|
||||
max,trieVariant.TRIE_CODED_NUMERIC_MAX.equals(max)
|
||||
);
|
||||
lastNumberOfTerms=new Integer(count);
|
||||
//System.out.println("Found "+count+" distinct terms in filtered range for field '"+field+"'.");
|
||||
} finally {
|
||||
termDocs.close();
|
||||
}
|
||||
return bits;
|
||||
}
|
||||
|
||||
/**
|
||||
* EXPERT: Return the number of terms visited during the last execution of {@link #getDocIdSet}.
|
||||
* This may be used for performance comparisons of different trie variants and their effectiveness.
|
||||
* This method is not thread safe, be sure to only call it when no query is running!
|
||||
* @throws IllegalStateException if {@link #getDocIdSet} was not yet executed.
|
||||
*/
|
||||
//@Override
|
||||
public int getLastNumberOfTerms() {
|
||||
if (lastNumberOfTerms==null) throw new IllegalStateException();
|
||||
return lastNumberOfTerms.intValue();
|
||||
}
|
||||
|
||||
// members
|
||||
private final String field,min,max;
|
||||
private final TrieUtils trieVariant;
|
||||
private Object minUnconverted,maxUnconverted;
|
||||
private Integer lastNumberOfTerms=null;
|
||||
// members
|
||||
private final String field,min,max;
|
||||
private final TrieUtils trieVariant;
|
||||
private Object minUnconverted,maxUnconverted;
|
||||
private Integer lastNumberOfTerms=null;
|
||||
}
|
||||
|
|
|
@ -31,115 +31,114 @@ import org.apache.lucene.index.IndexReader;
|
|||
* by {@link TrieUtils} methods.
|
||||
* <p>This class wraps a {@link TrieRangeFilter} using a {@link ConstantScoreQuery}.
|
||||
* @see TrieRangeFilter
|
||||
* @author Uwe Schindler (panFMP developer)
|
||||
*/
|
||||
public final class TrieRangeQuery extends Query {
|
||||
|
||||
/**
|
||||
* Universal constructor (expert use only): Uses already trie-converted min/max values.
|
||||
* You can set <code>min</code> or <code>max</code> (but not both) to <code>null</code> to leave one bound open.
|
||||
* <p>This constructor uses the trie variant returned by {@link TrieUtils#getDefaultTrieVariant()}.
|
||||
*/
|
||||
public TrieRangeQuery(final String field, final String min, final String max) {
|
||||
filter=new TrieRangeFilter(field,min,max);
|
||||
}
|
||||
/**
|
||||
* Universal constructor (expert use only): Uses already trie-converted min/max values.
|
||||
* You can set <code>min</code> or <code>max</code> (but not both) to <code>null</code> to leave one bound open.
|
||||
* <p>This constructor uses the trie variant returned by {@link TrieUtils#getDefaultTrieVariant()}.
|
||||
*/
|
||||
public TrieRangeQuery(final String field, final String min, final String max) {
|
||||
filter=new TrieRangeFilter(field,min,max);
|
||||
}
|
||||
|
||||
/**
|
||||
* Universal constructor (expert use only): Uses already trie-converted min/max values.
|
||||
* You can set <code>min</code> or <code>max</code> (but not both) to <code>null</code> to leave one bound open.
|
||||
*/
|
||||
public TrieRangeQuery(final String field, final String min, final String max, final TrieUtils variant) {
|
||||
filter=new TrieRangeFilter(field,min,max,variant);
|
||||
}
|
||||
/**
|
||||
* Universal constructor (expert use only): Uses already trie-converted min/max values.
|
||||
* You can set <code>min</code> or <code>max</code> (but not both) to <code>null</code> to leave one bound open.
|
||||
*/
|
||||
public TrieRangeQuery(final String field, final String min, final String max, final TrieUtils variant) {
|
||||
filter=new TrieRangeFilter(field,min,max,variant);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a trie query using the supplied field with range bounds in numeric form (double).
|
||||
* You can set <code>min</code> or <code>max</code> (but not both) to <code>null</code> to leave one bound open.
|
||||
* <p>This constructor uses the trie variant returned by {@link TrieUtils#getDefaultTrieVariant()}.
|
||||
*/
|
||||
public TrieRangeQuery(final String field, final Double min, final Double max) {
|
||||
filter=new TrieRangeFilter(field,min,max);
|
||||
}
|
||||
/**
|
||||
* Generates a trie query using the supplied field with range bounds in numeric form (double).
|
||||
* You can set <code>min</code> or <code>max</code> (but not both) to <code>null</code> to leave one bound open.
|
||||
* <p>This constructor uses the trie variant returned by {@link TrieUtils#getDefaultTrieVariant()}.
|
||||
*/
|
||||
public TrieRangeQuery(final String field, final Double min, final Double max) {
|
||||
filter=new TrieRangeFilter(field,min,max);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a trie query using the supplied field with range bounds in numeric form (double).
|
||||
* You can set <code>min</code> or <code>max</code> (but not both) to <code>null</code> to leave one bound open.
|
||||
*/
|
||||
public TrieRangeQuery(final String field, final Double min, final Double max, final TrieUtils variant) {
|
||||
filter=new TrieRangeFilter(field,min,max,variant);
|
||||
}
|
||||
/**
|
||||
* Generates a trie query using the supplied field with range bounds in numeric form (double).
|
||||
* You can set <code>min</code> or <code>max</code> (but not both) to <code>null</code> to leave one bound open.
|
||||
*/
|
||||
public TrieRangeQuery(final String field, final Double min, final Double max, final TrieUtils variant) {
|
||||
filter=new TrieRangeFilter(field,min,max,variant);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a trie query using the supplied field with range bounds in date/time form.
|
||||
* You can set <code>min</code> or <code>max</code> (but not both) to <code>null</code> to leave one bound open.
|
||||
* <p>This constructor uses the trie variant returned by {@link TrieUtils#getDefaultTrieVariant()}.
|
||||
*/
|
||||
public TrieRangeQuery(final String field, final Date min, final Date max) {
|
||||
filter=new TrieRangeFilter(field,min,max);
|
||||
}
|
||||
/**
|
||||
* Generates a trie query using the supplied field with range bounds in date/time form.
|
||||
* You can set <code>min</code> or <code>max</code> (but not both) to <code>null</code> to leave one bound open.
|
||||
* <p>This constructor uses the trie variant returned by {@link TrieUtils#getDefaultTrieVariant()}.
|
||||
*/
|
||||
public TrieRangeQuery(final String field, final Date min, final Date max) {
|
||||
filter=new TrieRangeFilter(field,min,max);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a trie query using the supplied field with range bounds in date/time form.
|
||||
* You can set <code>min</code> or <code>max</code> (but not both) to <code>null</code> to leave one bound open.
|
||||
*/
|
||||
public TrieRangeQuery(final String field, final Date min, final Date max, final TrieUtils variant) {
|
||||
filter=new TrieRangeFilter(field,min,max,variant);
|
||||
}
|
||||
/**
|
||||
* Generates a trie query using the supplied field with range bounds in date/time form.
|
||||
* You can set <code>min</code> or <code>max</code> (but not both) to <code>null</code> to leave one bound open.
|
||||
*/
|
||||
public TrieRangeQuery(final String field, final Date min, final Date max, final TrieUtils variant) {
|
||||
filter=new TrieRangeFilter(field,min,max,variant);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a trie query using the supplied field with range bounds in integer form (long).
|
||||
* You can set <code>min</code> or <code>max</code> (but not both) to <code>null</code> to leave one bound open.
|
||||
* <p>This constructor uses the trie variant returned by {@link TrieUtils#getDefaultTrieVariant()}.
|
||||
*/
|
||||
public TrieRangeQuery(final String field, final Long min, final Long max) {
|
||||
filter=new TrieRangeFilter(field,min,max);
|
||||
}
|
||||
/**
|
||||
* Generates a trie query using the supplied field with range bounds in integer form (long).
|
||||
* You can set <code>min</code> or <code>max</code> (but not both) to <code>null</code> to leave one bound open.
|
||||
* <p>This constructor uses the trie variant returned by {@link TrieUtils#getDefaultTrieVariant()}.
|
||||
*/
|
||||
public TrieRangeQuery(final String field, final Long min, final Long max) {
|
||||
filter=new TrieRangeFilter(field,min,max);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a trie query using the supplied field with range bounds in integer form (long).
|
||||
* You can set <code>min</code> or <code>max</code> (but not both) to <code>null</code> to leave one bound open.
|
||||
*/
|
||||
public TrieRangeQuery(final String field, final Long min, final Long max, final TrieUtils variant) {
|
||||
filter=new TrieRangeFilter(field,min,max,variant);
|
||||
}
|
||||
/**
|
||||
* Generates a trie query using the supplied field with range bounds in integer form (long).
|
||||
* You can set <code>min</code> or <code>max</code> (but not both) to <code>null</code> to leave one bound open.
|
||||
*/
|
||||
public TrieRangeQuery(final String field, final Long min, final Long max, final TrieUtils variant) {
|
||||
filter=new TrieRangeFilter(field,min,max,variant);
|
||||
}
|
||||
|
||||
//@Override
|
||||
public String toString(final String field) {
|
||||
return filter.toString(field)+ToStringUtils.boost(getBoost());
|
||||
}
|
||||
//@Override
|
||||
public String toString(final String field) {
|
||||
return filter.toString(field)+ToStringUtils.boost(getBoost());
|
||||
}
|
||||
|
||||
//@Override
|
||||
public final boolean equals(final Object o) {
|
||||
if (o instanceof TrieRangeQuery) {
|
||||
TrieRangeQuery q=(TrieRangeQuery)o;
|
||||
return (filter.equals(q.filter) && getBoost()==q.getBoost());
|
||||
} else return false;
|
||||
}
|
||||
//@Override
|
||||
public final boolean equals(final Object o) {
|
||||
if (o instanceof TrieRangeQuery) {
|
||||
TrieRangeQuery q=(TrieRangeQuery)o;
|
||||
return (filter.equals(q.filter) && getBoost()==q.getBoost());
|
||||
} else return false;
|
||||
}
|
||||
|
||||
//@Override
|
||||
public final int hashCode() {
|
||||
return filter.hashCode()^0x1756fa55+Float.floatToIntBits(getBoost());
|
||||
}
|
||||
//@Override
|
||||
public final int hashCode() {
|
||||
return filter.hashCode()^0x1756fa55+Float.floatToIntBits(getBoost());
|
||||
}
|
||||
|
||||
/**
|
||||
* Rewrites the query to native Lucene {@link Query}'s. This implementation uses a {@link ConstantScoreQuery} with
|
||||
* a {@link TrieRangeFilter} as implementation of the trie algorithm.
|
||||
*/
|
||||
//@Override
|
||||
public Query rewrite(final IndexReader reader) throws IOException {
|
||||
final ConstantScoreQuery q = new ConstantScoreQuery(filter);
|
||||
q.setBoost(getBoost());
|
||||
return q.rewrite(reader);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the underlying filter.
|
||||
*/
|
||||
public TrieRangeFilter getFilter() {
|
||||
return filter;
|
||||
}
|
||||
/**
|
||||
* Rewrites the query to native Lucene {@link Query}'s. This implementation uses a {@link ConstantScoreQuery} with
|
||||
* a {@link TrieRangeFilter} as implementation of the trie algorithm.
|
||||
*/
|
||||
//@Override
|
||||
public Query rewrite(final IndexReader reader) throws IOException {
|
||||
final ConstantScoreQuery q = new ConstantScoreQuery(filter);
|
||||
q.setBoost(getBoost());
|
||||
return q.rewrite(reader);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the underlying filter.
|
||||
*/
|
||||
public TrieRangeFilter getFilter() {
|
||||
return filter;
|
||||
}
|
||||
|
||||
// members
|
||||
private final TrieRangeFilter filter;
|
||||
// members
|
||||
private final TrieRangeFilter filter;
|
||||
|
||||
}
|
||||
|
|
|
@ -47,373 +47,373 @@ import org.apache.lucene.search.ExtendedFieldCache;
|
|||
* By this it is possible to get the correct enumeration of terms in correct precision
|
||||
* of the term list by just jumping to the correct fieldname and/or prefix. The full precision value may also be
|
||||
* stored in the document. Having the full precision value as term in a separate field with the original name,
|
||||
* sorting of query results agains such fields is possible using the original field name.
|
||||
* sorting of query results against such fields is possible using the original field name.
|
||||
*/
|
||||
public final class TrieUtils {
|
||||
|
||||
/** Instance of TrieUtils using a trie factor of 8 bit. */
|
||||
public static final TrieUtils VARIANT_8BIT=new TrieUtils(8);
|
||||
/** Instance of TrieUtils using a trie factor of 8 bit. */
|
||||
public static final TrieUtils VARIANT_8BIT=new TrieUtils(8);
|
||||
|
||||
/** Instance of TrieUtils using a trie factor of 4 bit. */
|
||||
public static final TrieUtils VARIANT_4BIT=new TrieUtils(4);
|
||||
/** Instance of TrieUtils using a trie factor of 4 bit. */
|
||||
public static final TrieUtils VARIANT_4BIT=new TrieUtils(4);
|
||||
|
||||
/** Instance of TrieUtils using a trie factor of 2 bit. */
|
||||
public static final TrieUtils VARIANT_2BIT=new TrieUtils(2);
|
||||
/** Instance of TrieUtils using a trie factor of 2 bit. */
|
||||
public static final TrieUtils VARIANT_2BIT=new TrieUtils(2);
|
||||
|
||||
/** Marker (PADDING) before lower-precision trie entries to signal the precision value. See class description! */
|
||||
public static final char TRIE_CODED_PADDING_START=(char)0x20;
|
||||
|
||||
/** The "helper" field containing the lower precision terms is the original fieldname with this appended. */
|
||||
public static final String LOWER_PRECISION_FIELD_NAME_SUFFIX="#trie";
|
||||
/** Marker (PADDING) before lower-precision trie entries to signal the precision value. See class description! */
|
||||
public static final char TRIE_CODED_PADDING_START=(char)0x20;
|
||||
|
||||
/** The "helper" field containing the lower precision terms is the original fieldname with this appended. */
|
||||
public static final String LOWER_PRECISION_FIELD_NAME_SUFFIX="#trie";
|
||||
|
||||
/** Character used as lower end */
|
||||
public static final char TRIE_CODED_SYMBOL_MIN=(char)0x100;
|
||||
/** Character used as lower end */
|
||||
public static final char TRIE_CODED_SYMBOL_MIN=(char)0x100;
|
||||
|
||||
/**
|
||||
* A parser instance for filling a {@link ExtendedFieldCache}, that parses trie encoded fields as longs,
|
||||
* auto detecting the trie encoding variant using the String length.
|
||||
*/
|
||||
public static final ExtendedFieldCache.LongParser FIELD_CACHE_LONG_PARSER_AUTO=new ExtendedFieldCache.LongParser(){
|
||||
public final long parseLong(String val) {
|
||||
return trieCodedToLongAuto(val);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* A parser instance for filling a {@link ExtendedFieldCache}, that parses trie encoded fields as doubles,
|
||||
* auto detecting the trie encoding variant using the String length.
|
||||
*/
|
||||
public static final ExtendedFieldCache.DoubleParser FIELD_CACHE_DOUBLE_PARSER_AUTO=new ExtendedFieldCache.DoubleParser(){
|
||||
public final double parseDouble(String val) {
|
||||
return trieCodedToDoubleAuto(val);
|
||||
}
|
||||
};
|
||||
|
||||
private static TrieUtils defaultTrieVariant=TrieUtils.VARIANT_8BIT;
|
||||
/**
|
||||
* A parser instance for filling a {@link ExtendedFieldCache}, that parses trie encoded fields as longs,
|
||||
* auto detecting the trie encoding variant using the String length.
|
||||
*/
|
||||
public static final ExtendedFieldCache.LongParser FIELD_CACHE_LONG_PARSER_AUTO=new ExtendedFieldCache.LongParser(){
|
||||
public final long parseLong(String val) {
|
||||
return trieCodedToLongAuto(val);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* A parser instance for filling a {@link ExtendedFieldCache}, that parses trie encoded fields as doubles,
|
||||
* auto detecting the trie encoding variant using the String length.
|
||||
*/
|
||||
public static final ExtendedFieldCache.DoubleParser FIELD_CACHE_DOUBLE_PARSER_AUTO=new ExtendedFieldCache.DoubleParser(){
|
||||
public final double parseDouble(String val) {
|
||||
return trieCodedToDoubleAuto(val);
|
||||
}
|
||||
};
|
||||
|
||||
private static TrieUtils defaultTrieVariant=TrieUtils.VARIANT_8BIT;
|
||||
|
||||
/**
|
||||
* Sets the default variant used for generating trie values and ranges.
|
||||
* It is used by the constructors of {@link TrieRangeQuery} and {@link TrieRangeFilter} without <code>TrieUtils</code> parameter
|
||||
* and can be used to get a default value through your whole application.
|
||||
*/
|
||||
public synchronized static final void setDefaultTrieVariant(final TrieUtils variant) {
|
||||
defaultTrieVariant=variant;
|
||||
}
|
||||
/**
|
||||
* Sets the default variant used for generating trie values and ranges.
|
||||
* It is used by the constructors of {@link TrieRangeQuery} and {@link TrieRangeFilter} without <code>TrieUtils</code> parameter
|
||||
* and can be used to get a default value through your whole application.
|
||||
*/
|
||||
public synchronized static final void setDefaultTrieVariant(final TrieUtils variant) {
|
||||
defaultTrieVariant=variant;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the default variant used for generating trie values and ranges.
|
||||
* It is used by the constructors of {@link TrieRangeQuery} and {@link TrieRangeFilter} without <code>TrieUtils</code> parameter
|
||||
* and can be used to get a default value through your whole application.
|
||||
* <p>The default, if not set by {@link #setDefaultTrieVariant}, is {@link #VARIANT_8BIT}.
|
||||
*/
|
||||
public synchronized static final TrieUtils getDefaultTrieVariant() {
|
||||
return defaultTrieVariant;
|
||||
}
|
||||
|
||||
/**
|
||||
* Detects and returns the variant of a trie encoded string using the length.
|
||||
* @throws NumberFormatException if the length is not 8, 16, or 32 chars.
|
||||
*/
|
||||
public static final TrieUtils autoDetectVariant(final String s) {
|
||||
final int l=s.length();
|
||||
if (l==VARIANT_8BIT.TRIE_CODED_LENGTH) {
|
||||
return VARIANT_8BIT;
|
||||
} else if (l==VARIANT_4BIT.TRIE_CODED_LENGTH) {
|
||||
return VARIANT_4BIT;
|
||||
} else if (l==VARIANT_2BIT.TRIE_CODED_LENGTH) {
|
||||
return VARIANT_2BIT;
|
||||
} else {
|
||||
throw new NumberFormatException("Invalid trie encoded numerical value representation (incompatible length).");
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Gets the default variant used for generating trie values and ranges.
|
||||
* It is used by the constructors of {@link TrieRangeQuery} and {@link TrieRangeFilter} without <code>TrieUtils</code> parameter
|
||||
* and can be used to get a default value through your whole application.
|
||||
* <p>The default, if not set by {@link #setDefaultTrieVariant}, is {@link #VARIANT_8BIT}.
|
||||
*/
|
||||
public synchronized static final TrieUtils getDefaultTrieVariant() {
|
||||
return defaultTrieVariant;
|
||||
}
|
||||
|
||||
/**
|
||||
* Detects and returns the variant of a trie encoded string using the length.
|
||||
* @throws NumberFormatException if the length is not 8, 16, or 32 chars.
|
||||
*/
|
||||
public static final TrieUtils autoDetectVariant(final String s) {
|
||||
final int l=s.length();
|
||||
if (l==VARIANT_8BIT.TRIE_CODED_LENGTH) {
|
||||
return VARIANT_8BIT;
|
||||
} else if (l==VARIANT_4BIT.TRIE_CODED_LENGTH) {
|
||||
return VARIANT_4BIT;
|
||||
} else if (l==VARIANT_2BIT.TRIE_CODED_LENGTH) {
|
||||
return VARIANT_2BIT;
|
||||
} else {
|
||||
throw new NumberFormatException("Invalid trie encoded numerical value representation (incompatible length).");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a encoded <code>String</code> value back to a <code>long</code>,
|
||||
* auto detecting the trie encoding variant using the String length.
|
||||
*/
|
||||
public static final long trieCodedToLongAuto(final String s) {
|
||||
return autoDetectVariant(s).trieCodedToLong(s);
|
||||
}
|
||||
/**
|
||||
* Converts a encoded <code>String</code> value back to a <code>long</code>,
|
||||
* auto detecting the trie encoding variant using the String length.
|
||||
*/
|
||||
public static final long trieCodedToLongAuto(final String s) {
|
||||
return autoDetectVariant(s).trieCodedToLong(s);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a encoded <code>String</code> value back to a <code>double</code>,
|
||||
* auto detecting the trie encoding variant using the String length.
|
||||
*/
|
||||
public static final double trieCodedToDoubleAuto(final String s) {
|
||||
return autoDetectVariant(s).trieCodedToDouble(s);
|
||||
}
|
||||
/**
|
||||
* Converts a encoded <code>String</code> value back to a <code>double</code>,
|
||||
* auto detecting the trie encoding variant using the String length.
|
||||
*/
|
||||
public static final double trieCodedToDoubleAuto(final String s) {
|
||||
return autoDetectVariant(s).trieCodedToDouble(s);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a encoded <code>String</code> value back to a <code>Date</code>,
|
||||
* auto detecting the trie encoding variant using the String length.
|
||||
*/
|
||||
public static final Date trieCodedToDateAuto(final String s) {
|
||||
return autoDetectVariant(s).trieCodedToDate(s);
|
||||
}
|
||||
/**
|
||||
* Converts a encoded <code>String</code> value back to a <code>Date</code>,
|
||||
* auto detecting the trie encoding variant using the String length.
|
||||
*/
|
||||
public static final Date trieCodedToDateAuto(final String s) {
|
||||
return autoDetectVariant(s).trieCodedToDate(s);
|
||||
}
|
||||
|
||||
/**
|
||||
* A factory method, that generates a {@link SortField} instance for sorting trie encoded values,
|
||||
* automatically detecting the trie encoding variant using the String length.
|
||||
*/
|
||||
public static final SortField getSortFieldAuto(final String field) {
|
||||
return new SortField(field, FIELD_CACHE_LONG_PARSER_AUTO);
|
||||
}
|
||||
|
||||
/**
|
||||
* A factory method, that generates a {@link SortField} instance for sorting trie encoded values,
|
||||
* automatically detecting the trie encoding variant using the String length.
|
||||
*/
|
||||
public static final SortField getSortFieldAuto(final String field, boolean reverse) {
|
||||
return new SortField(field, FIELD_CACHE_LONG_PARSER_AUTO, reverse);
|
||||
}
|
||||
|
||||
// TrieUtils instance's part
|
||||
|
||||
private TrieUtils(int bits) {
|
||||
assert 64%bits == 0;
|
||||
|
||||
// helper variable for conversion
|
||||
mask = (1L << bits) - 1L;
|
||||
/**
|
||||
* A factory method, that generates a {@link SortField} instance for sorting trie encoded values,
|
||||
* automatically detecting the trie encoding variant using the String length.
|
||||
*/
|
||||
public static final SortField getSortFieldAuto(final String field) {
|
||||
return new SortField(field, FIELD_CACHE_LONG_PARSER_AUTO);
|
||||
}
|
||||
|
||||
/**
|
||||
* A factory method, that generates a {@link SortField} instance for sorting trie encoded values,
|
||||
* automatically detecting the trie encoding variant using the String length.
|
||||
*/
|
||||
public static final SortField getSortFieldAuto(final String field, boolean reverse) {
|
||||
return new SortField(field, FIELD_CACHE_LONG_PARSER_AUTO, reverse);
|
||||
}
|
||||
|
||||
// TrieUtils instance's part
|
||||
|
||||
private TrieUtils(int bits) {
|
||||
assert 64%bits == 0;
|
||||
|
||||
// helper variable for conversion
|
||||
mask = (1L << bits) - 1L;
|
||||
|
||||
// init global "constants"
|
||||
TRIE_BITS=bits;
|
||||
TRIE_CODED_LENGTH=64/TRIE_BITS;
|
||||
TRIE_CODED_SYMBOL_MAX=(char)(TRIE_CODED_SYMBOL_MIN+mask);
|
||||
TRIE_CODED_NUMERIC_MIN=longToTrieCoded(Long.MIN_VALUE);
|
||||
TRIE_CODED_NUMERIC_MAX=longToTrieCoded(Long.MAX_VALUE);
|
||||
}
|
||||
// init global "constants"
|
||||
TRIE_BITS=bits;
|
||||
TRIE_CODED_LENGTH=64/TRIE_BITS;
|
||||
TRIE_CODED_SYMBOL_MAX=(char)(TRIE_CODED_SYMBOL_MIN+mask);
|
||||
TRIE_CODED_NUMERIC_MIN=longToTrieCoded(Long.MIN_VALUE);
|
||||
TRIE_CODED_NUMERIC_MAX=longToTrieCoded(Long.MAX_VALUE);
|
||||
}
|
||||
|
||||
// internal conversion to/from strings
|
||||
// internal conversion to/from strings
|
||||
|
||||
private final String internalLongToTrieCoded(long l) {
|
||||
final char[] buf=new char[TRIE_CODED_LENGTH];
|
||||
for (int i=TRIE_CODED_LENGTH-1; i>=0; i--) {
|
||||
buf[i] = (char)( TRIE_CODED_SYMBOL_MIN + (l & mask) );
|
||||
l = l >>> TRIE_BITS;
|
||||
}
|
||||
return new String(buf);
|
||||
}
|
||||
private final String internalLongToTrieCoded(long l) {
|
||||
final char[] buf=new char[TRIE_CODED_LENGTH];
|
||||
for (int i=TRIE_CODED_LENGTH-1; i>=0; i--) {
|
||||
buf[i] = (char)( TRIE_CODED_SYMBOL_MIN + (l & mask) );
|
||||
l = l >>> TRIE_BITS;
|
||||
}
|
||||
return new String(buf);
|
||||
}
|
||||
|
||||
private final long internalTrieCodedToLong(final String s) {
|
||||
if (s==null) throw new NullPointerException("Trie encoded string may not be NULL");
|
||||
final int len=s.length();
|
||||
if (len!=TRIE_CODED_LENGTH) throw new NumberFormatException(
|
||||
"Invalid trie encoded numerical value representation (incompatible length, must be "+TRIE_CODED_LENGTH+")"
|
||||
);
|
||||
long l=0L;
|
||||
for (int i=0; i<len; i++) {
|
||||
char ch=s.charAt(i);
|
||||
if (ch>=TRIE_CODED_SYMBOL_MIN && ch<=TRIE_CODED_SYMBOL_MAX) {
|
||||
l = (l << TRIE_BITS) | (long)(ch-TRIE_CODED_SYMBOL_MIN);
|
||||
} else {
|
||||
throw new NumberFormatException(
|
||||
"Invalid trie encoded numerical value representation (char "+
|
||||
Integer.toHexString((int)ch)+" at position "+i+" is invalid)"
|
||||
);
|
||||
}
|
||||
}
|
||||
return l;
|
||||
}
|
||||
private final long internalTrieCodedToLong(final String s) {
|
||||
if (s==null) throw new NullPointerException("Trie encoded string may not be NULL");
|
||||
final int len=s.length();
|
||||
if (len!=TRIE_CODED_LENGTH) throw new NumberFormatException(
|
||||
"Invalid trie encoded numerical value representation (incompatible length, must be "+TRIE_CODED_LENGTH+")"
|
||||
);
|
||||
long l=0L;
|
||||
for (int i=0; i<len; i++) {
|
||||
char ch=s.charAt(i);
|
||||
if (ch>=TRIE_CODED_SYMBOL_MIN && ch<=TRIE_CODED_SYMBOL_MAX) {
|
||||
l = (l << TRIE_BITS) | (long)(ch-TRIE_CODED_SYMBOL_MIN);
|
||||
} else {
|
||||
throw new NumberFormatException(
|
||||
"Invalid trie encoded numerical value representation (char "+
|
||||
Integer.toHexString((int)ch)+" at position "+i+" is invalid)"
|
||||
);
|
||||
}
|
||||
}
|
||||
return l;
|
||||
}
|
||||
|
||||
// Long's
|
||||
// Long's
|
||||
|
||||
/** Converts a <code>long</code> value encoded to a <code>String</code>. */
|
||||
public String longToTrieCoded(final long l) {
|
||||
return internalLongToTrieCoded(l ^ 0x8000000000000000L);
|
||||
}
|
||||
/** Converts a <code>long</code> value encoded to a <code>String</code>. */
|
||||
public String longToTrieCoded(final long l) {
|
||||
return internalLongToTrieCoded(l ^ 0x8000000000000000L);
|
||||
}
|
||||
|
||||
/** Converts a encoded <code>String</code> value back to a <code>long</code>. */
|
||||
public long trieCodedToLong(final String s) {
|
||||
return internalTrieCodedToLong(s) ^ 0x8000000000000000L;
|
||||
}
|
||||
/** Converts a encoded <code>String</code> value back to a <code>long</code>. */
|
||||
public long trieCodedToLong(final String s) {
|
||||
return internalTrieCodedToLong(s) ^ 0x8000000000000000L;
|
||||
}
|
||||
|
||||
// Double's
|
||||
// Double's
|
||||
|
||||
/** Converts a <code>double</code> value encoded to a <code>String</code>. */
|
||||
public String doubleToTrieCoded(final double d) {
|
||||
long l=Double.doubleToLongBits(d);
|
||||
if ((l & 0x8000000000000000L) == 0L) {
|
||||
// >0
|
||||
l |= 0x8000000000000000L;
|
||||
} else {
|
||||
// <0
|
||||
l = ~l;
|
||||
}
|
||||
return internalLongToTrieCoded(l);
|
||||
}
|
||||
/** Converts a <code>double</code> value encoded to a <code>String</code>. */
|
||||
public String doubleToTrieCoded(final double d) {
|
||||
long l=Double.doubleToLongBits(d);
|
||||
if ((l & 0x8000000000000000L) == 0L) {
|
||||
// >0
|
||||
l |= 0x8000000000000000L;
|
||||
} else {
|
||||
// <0
|
||||
l = ~l;
|
||||
}
|
||||
return internalLongToTrieCoded(l);
|
||||
}
|
||||
|
||||
/** Converts a encoded <code>String</code> value back to a <code>double</code>. */
|
||||
public double trieCodedToDouble(final String s) {
|
||||
long l=internalTrieCodedToLong(s);
|
||||
if ((l & 0x8000000000000000L) != 0L) {
|
||||
// >0
|
||||
l &= 0x7fffffffffffffffL;
|
||||
} else {
|
||||
// <0
|
||||
l = ~l;
|
||||
}
|
||||
return Double.longBitsToDouble(l);
|
||||
}
|
||||
/** Converts a encoded <code>String</code> value back to a <code>double</code>. */
|
||||
public double trieCodedToDouble(final String s) {
|
||||
long l=internalTrieCodedToLong(s);
|
||||
if ((l & 0x8000000000000000L) != 0L) {
|
||||
// >0
|
||||
l &= 0x7fffffffffffffffL;
|
||||
} else {
|
||||
// <0
|
||||
l = ~l;
|
||||
}
|
||||
return Double.longBitsToDouble(l);
|
||||
}
|
||||
|
||||
// Date's
|
||||
// Date's
|
||||
|
||||
/** Converts a <code>Date</code> value encoded to a <code>String</code>. */
|
||||
public String dateToTrieCoded(final Date d) {
|
||||
return longToTrieCoded(d.getTime());
|
||||
}
|
||||
/** Converts a <code>Date</code> value encoded to a <code>String</code>. */
|
||||
public String dateToTrieCoded(final Date d) {
|
||||
return longToTrieCoded(d.getTime());
|
||||
}
|
||||
|
||||
/** Converts a encoded <code>String</code> value back to a <code>Date</code>. */
|
||||
public Date trieCodedToDate(final String s) {
|
||||
return new Date(trieCodedToLong(s));
|
||||
}
|
||||
/** Converts a encoded <code>String</code> value back to a <code>Date</code>. */
|
||||
public Date trieCodedToDate(final String s) {
|
||||
return new Date(trieCodedToLong(s));
|
||||
}
|
||||
|
||||
// increment / decrement
|
||||
// increment / decrement
|
||||
|
||||
/** Increments an encoded String value by 1. Needed by {@link TrieRangeFilter}. */
|
||||
public String incrementTrieCoded(final String v) {
|
||||
final int l=v.length();
|
||||
final char[] buf=new char[l];
|
||||
boolean inc=true;
|
||||
for (int i=l-1; i>=0; i--) {
|
||||
int b=v.charAt(i)-TRIE_CODED_SYMBOL_MIN;
|
||||
if (inc) b++;
|
||||
if (inc=(b>(int)mask)) b=0;
|
||||
buf[i]=(char)(TRIE_CODED_SYMBOL_MIN+b);
|
||||
}
|
||||
return new String(buf);
|
||||
}
|
||||
/** Increments an encoded String value by 1. Needed by {@link TrieRangeFilter}. */
|
||||
public String incrementTrieCoded(final String v) {
|
||||
final int l=v.length();
|
||||
final char[] buf=new char[l];
|
||||
boolean inc=true;
|
||||
for (int i=l-1; i>=0; i--) {
|
||||
int b=v.charAt(i)-TRIE_CODED_SYMBOL_MIN;
|
||||
if (inc) b++;
|
||||
if (inc=(b>(int)mask)) b=0;
|
||||
buf[i]=(char)(TRIE_CODED_SYMBOL_MIN+b);
|
||||
}
|
||||
return new String(buf);
|
||||
}
|
||||
|
||||
/** Decrements an encoded String value by 1. Needed by {@link TrieRangeFilter}. */
|
||||
public String decrementTrieCoded(final String v) {
|
||||
final int l=v.length();
|
||||
final char[] buf=new char[l];
|
||||
boolean dec=true;
|
||||
for (int i=l-1; i>=0; i--) {
|
||||
int b=v.charAt(i)-TRIE_CODED_SYMBOL_MIN;
|
||||
if (dec) b--;
|
||||
if (dec=(b<0)) b=(int)mask;
|
||||
buf[i]=(char)(TRIE_CODED_SYMBOL_MIN+b);
|
||||
}
|
||||
return new String(buf);
|
||||
}
|
||||
/** Decrements an encoded String value by 1. Needed by {@link TrieRangeFilter}. */
|
||||
public String decrementTrieCoded(final String v) {
|
||||
final int l=v.length();
|
||||
final char[] buf=new char[l];
|
||||
boolean dec=true;
|
||||
for (int i=l-1; i>=0; i--) {
|
||||
int b=v.charAt(i)-TRIE_CODED_SYMBOL_MIN;
|
||||
if (dec) b--;
|
||||
if (dec=(b<0)) b=(int)mask;
|
||||
buf[i]=(char)(TRIE_CODED_SYMBOL_MIN+b);
|
||||
}
|
||||
return new String(buf);
|
||||
}
|
||||
|
||||
private void addConvertedTrieCodedDocumentField(
|
||||
final Document ldoc, final String fieldname, final String val,
|
||||
final boolean index, final Field.Store store
|
||||
) {
|
||||
Field f=new Field(fieldname, val, store, index?Field.Index.NOT_ANALYZED:Field.Index.NO);
|
||||
if (index) {
|
||||
f.setOmitTf(true);
|
||||
ldoc.add(f);
|
||||
// add the lower precision values in the helper field with prefix
|
||||
final StringBuffer sb=new StringBuffer(TRIE_CODED_LENGTH);
|
||||
synchronized(sb) {
|
||||
for (int i=TRIE_CODED_LENGTH-1; i>0; i--) {
|
||||
sb.setLength(0);
|
||||
f=new Field(
|
||||
fieldname + LOWER_PRECISION_FIELD_NAME_SUFFIX,
|
||||
sb.append( (char)(TRIE_CODED_PADDING_START+i) ).append( val.substring(0,i) ).toString(),
|
||||
Field.Store.NO, Field.Index.NOT_ANALYZED
|
||||
);
|
||||
f.setOmitTf(true);
|
||||
ldoc.add(f);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
ldoc.add(f);
|
||||
}
|
||||
}
|
||||
private void addConvertedTrieCodedDocumentField(
|
||||
final Document ldoc, final String fieldname, final String val,
|
||||
final boolean index, final Field.Store store
|
||||
) {
|
||||
Field f=new Field(fieldname, val, store, index?Field.Index.NOT_ANALYZED:Field.Index.NO);
|
||||
if (index) {
|
||||
f.setOmitTf(true);
|
||||
ldoc.add(f);
|
||||
// add the lower precision values in the helper field with prefix
|
||||
final StringBuffer sb=new StringBuffer(TRIE_CODED_LENGTH);
|
||||
synchronized(sb) {
|
||||
for (int i=TRIE_CODED_LENGTH-1; i>0; i--) {
|
||||
sb.setLength(0);
|
||||
f=new Field(
|
||||
fieldname + LOWER_PRECISION_FIELD_NAME_SUFFIX,
|
||||
sb.append( (char)(TRIE_CODED_PADDING_START+i) ).append( val.substring(0,i) ).toString(),
|
||||
Field.Store.NO, Field.Index.NOT_ANALYZED
|
||||
);
|
||||
f.setOmitTf(true);
|
||||
ldoc.add(f);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
ldoc.add(f);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Stores a double value in trie-form in document for indexing.
|
||||
* <p>To store the different precisions of the long values (from one byte [only the most significant one] to the full eight bytes),
|
||||
* each lower precision is prefixed by the length ({@link #TRIE_CODED_PADDING_START}<code>+precision</code>),
|
||||
* in an extra "helper" field with a name of <code>fieldname+{@link #LOWER_PRECISION_FIELD_NAME_SUFFIX}</code>
|
||||
* (i.e. fieldname "numeric" => lower precision's name "numeric#trie").
|
||||
* The full long is not prefixed at all and indexed and stored according to the given flags in the original field name.
|
||||
* If the field should not be searchable, set <code>index</code> to <code>false</code>. It is then only stored (for convenience).
|
||||
* Fields added to a document using this method can be queried by {@link TrieRangeQuery}.
|
||||
*/
|
||||
public void addDoubleTrieCodedDocumentField(
|
||||
final Document ldoc, final String fieldname, final double val,
|
||||
final boolean index, final Field.Store store
|
||||
) {
|
||||
addConvertedTrieCodedDocumentField(ldoc, fieldname, doubleToTrieCoded(val), index, store);
|
||||
}
|
||||
/**
|
||||
* Stores a double value in trie-form in document for indexing.
|
||||
* <p>To store the different precisions of the long values (from one byte [only the most significant one] to the full eight bytes),
|
||||
* each lower precision is prefixed by the length ({@link #TRIE_CODED_PADDING_START}<code>+precision</code>),
|
||||
* in an extra "helper" field with a name of <code>fieldname+{@link #LOWER_PRECISION_FIELD_NAME_SUFFIX}</code>
|
||||
* (i.e. fieldname "numeric" => lower precision's name "numeric#trie").
|
||||
* The full long is not prefixed at all and indexed and stored according to the given flags in the original field name.
|
||||
* If the field should not be searchable, set <code>index</code> to <code>false</code>. It is then only stored (for convenience).
|
||||
* Fields added to a document using this method can be queried by {@link TrieRangeQuery}.
|
||||
*/
|
||||
public void addDoubleTrieCodedDocumentField(
|
||||
final Document ldoc, final String fieldname, final double val,
|
||||
final boolean index, final Field.Store store
|
||||
) {
|
||||
addConvertedTrieCodedDocumentField(ldoc, fieldname, doubleToTrieCoded(val), index, store);
|
||||
}
|
||||
|
||||
/**
|
||||
* Stores a Date value in trie-form in document for indexing.
|
||||
* <p>To store the different precisions of the long values (from one byte [only the most significant one] to the full eight bytes),
|
||||
* each lower precision is prefixed by the length ({@link #TRIE_CODED_PADDING_START}<code>+precision</code>),
|
||||
* in an extra "helper" field with a name of <code>fieldname+{@link #LOWER_PRECISION_FIELD_NAME_SUFFIX}</code>
|
||||
* (i.e. fieldname "numeric" => lower precision's name "numeric#trie").
|
||||
* The full long is not prefixed at all and indexed and stored according to the given flags in the original field name.
|
||||
* If the field should not be searchable, set <code>index</code> to <code>false</code>. It is then only stored (for convenience).
|
||||
* Fields added to a document using this method can be queried by {@link TrieRangeQuery}.
|
||||
*/
|
||||
public void addDateTrieCodedDocumentField(
|
||||
final Document ldoc, final String fieldname,
|
||||
final Date val, final boolean index, final Field.Store store
|
||||
) {
|
||||
addConvertedTrieCodedDocumentField(ldoc, fieldname, dateToTrieCoded(val), index, store);
|
||||
}
|
||||
/**
|
||||
* Stores a Date value in trie-form in document for indexing.
|
||||
* <p>To store the different precisions of the long values (from one byte [only the most significant one] to the full eight bytes),
|
||||
* each lower precision is prefixed by the length ({@link #TRIE_CODED_PADDING_START}<code>+precision</code>),
|
||||
* in an extra "helper" field with a name of <code>fieldname+{@link #LOWER_PRECISION_FIELD_NAME_SUFFIX}</code>
|
||||
* (i.e. fieldname "numeric" => lower precision's name "numeric#trie").
|
||||
* The full long is not prefixed at all and indexed and stored according to the given flags in the original field name.
|
||||
* If the field should not be searchable, set <code>index</code> to <code>false</code>. It is then only stored (for convenience).
|
||||
* Fields added to a document using this method can be queried by {@link TrieRangeQuery}.
|
||||
*/
|
||||
public void addDateTrieCodedDocumentField(
|
||||
final Document ldoc, final String fieldname,
|
||||
final Date val, final boolean index, final Field.Store store
|
||||
) {
|
||||
addConvertedTrieCodedDocumentField(ldoc, fieldname, dateToTrieCoded(val), index, store);
|
||||
}
|
||||
|
||||
/**
|
||||
* Stores a long value in trie-form in document for indexing.
|
||||
* <p>To store the different precisions of the long values (from one byte [only the most significant one] to the full eight bytes),
|
||||
* each lower precision is prefixed by the length ({@link #TRIE_CODED_PADDING_START}<code>+precision</code>),
|
||||
* in an extra "helper" field with a name of <code>fieldname+{@link #LOWER_PRECISION_FIELD_NAME_SUFFIX}</code>
|
||||
* (i.e. fieldname "numeric" => lower precision's name "numeric#trie").
|
||||
* The full long is not prefixed at all and indexed and stored according to the given flags in the original field name.
|
||||
* If the field should not be searchable, set <code>index</code> to <code>false</code>. It is then only stored (for convenience).
|
||||
* Fields added to a document using this method can be queried by {@link TrieRangeQuery}.
|
||||
*/
|
||||
public void addLongTrieCodedDocumentField(
|
||||
final Document ldoc, final String fieldname,
|
||||
final long val, final boolean index, final Field.Store store
|
||||
) {
|
||||
addConvertedTrieCodedDocumentField(ldoc, fieldname, longToTrieCoded(val), index, store);
|
||||
}
|
||||
|
||||
/** A factory method, that generates a {@link SortField} instance for sorting trie encoded values. */
|
||||
public SortField getSortField(final String field) {
|
||||
return new SortField(field, FIELD_CACHE_LONG_PARSER);
|
||||
}
|
||||
|
||||
/** A factory method, that generates a {@link SortField} instance for sorting trie encoded values. */
|
||||
public SortField getSortField(final String field, boolean reverse) {
|
||||
return new SortField(field, FIELD_CACHE_LONG_PARSER, reverse);
|
||||
}
|
||||
|
||||
/** A parser instance for filling a {@link ExtendedFieldCache}, that parses trie encoded fields as longs. */
|
||||
public final ExtendedFieldCache.LongParser FIELD_CACHE_LONG_PARSER=new ExtendedFieldCache.LongParser(){
|
||||
public final long parseLong(String val) {
|
||||
return trieCodedToLong(val);
|
||||
}
|
||||
};
|
||||
|
||||
/** A parser instance for filling a {@link ExtendedFieldCache}, that parses trie encoded fields as doubles. */
|
||||
public final ExtendedFieldCache.DoubleParser FIELD_CACHE_DOUBLE_PARSER=new ExtendedFieldCache.DoubleParser(){
|
||||
public final double parseDouble(String val) {
|
||||
return trieCodedToDouble(val);
|
||||
}
|
||||
};
|
||||
|
||||
private final long mask;
|
||||
|
||||
/** Number of bits used in this trie variant (2, 4, or 8) */
|
||||
public final int TRIE_BITS;
|
||||
/**
|
||||
* Stores a long value in trie-form in document for indexing.
|
||||
* <p>To store the different precisions of the long values (from one byte [only the most significant one] to the full eight bytes),
|
||||
* each lower precision is prefixed by the length ({@link #TRIE_CODED_PADDING_START}<code>+precision</code>),
|
||||
* in an extra "helper" field with a name of <code>fieldname+{@link #LOWER_PRECISION_FIELD_NAME_SUFFIX}</code>
|
||||
* (i.e. fieldname "numeric" => lower precision's name "numeric#trie").
|
||||
* The full long is not prefixed at all and indexed and stored according to the given flags in the original field name.
|
||||
* If the field should not be searchable, set <code>index</code> to <code>false</code>. It is then only stored (for convenience).
|
||||
* Fields added to a document using this method can be queried by {@link TrieRangeQuery}.
|
||||
*/
|
||||
public void addLongTrieCodedDocumentField(
|
||||
final Document ldoc, final String fieldname,
|
||||
final long val, final boolean index, final Field.Store store
|
||||
) {
|
||||
addConvertedTrieCodedDocumentField(ldoc, fieldname, longToTrieCoded(val), index, store);
|
||||
}
|
||||
|
||||
/** A factory method, that generates a {@link SortField} instance for sorting trie encoded values. */
|
||||
public SortField getSortField(final String field) {
|
||||
return new SortField(field, FIELD_CACHE_LONG_PARSER);
|
||||
}
|
||||
|
||||
/** A factory method, that generates a {@link SortField} instance for sorting trie encoded values. */
|
||||
public SortField getSortField(final String field, boolean reverse) {
|
||||
return new SortField(field, FIELD_CACHE_LONG_PARSER, reverse);
|
||||
}
|
||||
|
||||
/** A parser instance for filling a {@link ExtendedFieldCache}, that parses trie encoded fields as longs. */
|
||||
public final ExtendedFieldCache.LongParser FIELD_CACHE_LONG_PARSER=new ExtendedFieldCache.LongParser(){
|
||||
public final long parseLong(String val) {
|
||||
return trieCodedToLong(val);
|
||||
}
|
||||
};
|
||||
|
||||
/** A parser instance for filling a {@link ExtendedFieldCache}, that parses trie encoded fields as doubles. */
|
||||
public final ExtendedFieldCache.DoubleParser FIELD_CACHE_DOUBLE_PARSER=new ExtendedFieldCache.DoubleParser(){
|
||||
public final double parseDouble(String val) {
|
||||
return trieCodedToDouble(val);
|
||||
}
|
||||
};
|
||||
|
||||
private final long mask;
|
||||
|
||||
/** Number of bits used in this trie variant (2, 4, or 8) */
|
||||
public final int TRIE_BITS;
|
||||
|
||||
/** Length (in chars) of an encoded value (8, 16, or 32 chars) */
|
||||
public final int TRIE_CODED_LENGTH;
|
||||
/** Length (in chars) of an encoded value (8, 16, or 32 chars) */
|
||||
public final int TRIE_CODED_LENGTH;
|
||||
|
||||
/** Character used as upper end (depends on trie bits, its <code>{@link #TRIE_CODED_SYMBOL_MIN}+2^{@link #TRIE_BITS}-1</code>) */
|
||||
public final char TRIE_CODED_SYMBOL_MAX;
|
||||
/** Character used as upper end (depends on trie bits, its <code>{@link #TRIE_CODED_SYMBOL_MIN}+2^{@link #TRIE_BITS}-1</code>) */
|
||||
public final char TRIE_CODED_SYMBOL_MAX;
|
||||
|
||||
/** minimum encoded value of a numerical index entry: {@link Long#MIN_VALUE} */
|
||||
public final String TRIE_CODED_NUMERIC_MIN;
|
||||
/** minimum encoded value of a numerical index entry: {@link Long#MIN_VALUE} */
|
||||
public final String TRIE_CODED_NUMERIC_MIN;
|
||||
|
||||
/** maximum encoded value of a numerical index entry: {@link Long#MAX_VALUE} */
|
||||
public final String TRIE_CODED_NUMERIC_MAX;
|
||||
/** maximum encoded value of a numerical index entry: {@link Long#MAX_VALUE} */
|
||||
public final String TRIE_CODED_NUMERIC_MAX;
|
||||
|
||||
}
|
||||
|
|
|
@ -47,34 +47,34 @@ the values must be stored during indexing in a special format in the index (usin
|
|||
This can be done like this:</p>
|
||||
|
||||
<pre>
|
||||
Document doc = new Document();
|
||||
// add some standard fields:
|
||||
String svalue = "anything to index";
|
||||
doc.add(new Field("exampleString", svalue, Field.Store.YES, Field.Index.ANALYZED) ;
|
||||
// add some numerical fields:
|
||||
double fvalue = 1.057E17;
|
||||
TrieUtils.VARIANT_8BIT.addDoubleTrieCodedDocumentField(doc, "exampleDouble", fvalue, true /* index the field */, Field.Store.YES);
|
||||
long lvalue = 121345L;
|
||||
TrieUtils.VARIANT_8BIT.addLongTrieCodedDocumentField(doc, "exampleLong", lvalue, true /* index the field */, Field.Store.YES);
|
||||
Date dvalue = new Date(); // actual time
|
||||
TrieUtils.VARIANT_8BIT.addDateTrieCodedDocumentField(doc, "exampleDate", dvalue, true /* index the field */, Field.Store.YES);
|
||||
// add document to IndexWriter
|
||||
Document doc = new Document();
|
||||
// add some standard fields:
|
||||
String svalue = "anything to index";
|
||||
doc.add(new Field("exampleString", svalue, Field.Store.YES, Field.Index.ANALYZED) ;
|
||||
// add some numerical fields:
|
||||
double fvalue = 1.057E17;
|
||||
TrieUtils.VARIANT_8BIT.addDoubleTrieCodedDocumentField(doc, "exampleDouble", fvalue, true /* index the field */, Field.Store.YES);
|
||||
long lvalue = 121345L;
|
||||
TrieUtils.VARIANT_8BIT.addLongTrieCodedDocumentField(doc, "exampleLong", lvalue, true /* index the field */, Field.Store.YES);
|
||||
Date dvalue = new Date(); // actual time
|
||||
TrieUtils.VARIANT_8BIT.addDateTrieCodedDocumentField(doc, "exampleDate", dvalue, true /* index the field */, Field.Store.YES);
|
||||
// add document to IndexWriter
|
||||
</pre>
|
||||
|
||||
<p>The numeric index fields you prepared in this way can be searched by {@link org.apache.lucene.search.trie.TrieRangeQuery}:</p>
|
||||
|
||||
<pre>
|
||||
// Java 1.4, because Double.valueOf(double) is not available:
|
||||
Query q = new TrieRangeQuery("exampleDouble", new Double(1.0E17), new Double(2.0E17), TrieUtils.VARIANT_8BIT);
|
||||
// OR, Java 1.5, using autoboxing:
|
||||
Query q = new TrieRangeQuery("exampleDouble", 1.0E17, 2.0E17, TrieUtils.VARIANT_8BIT);
|
||||
TopDocs docs = searcher.search(q, 10);
|
||||
for (int i = 0; i<docs.scoreDocs.length; i++) {
|
||||
Document doc = searcher.doc(docs.scoreDocs[i].doc);
|
||||
System.out.println(doc.get("exampleString"));
|
||||
// decode the stored numerical value (important!!!):
|
||||
System.out.println(TrieUtils.VARIANT_8BIT.trieCodedToDouble(doc.get("exampleDouble")));
|
||||
}
|
||||
// Java 1.4, because Double.valueOf(double) is not available:
|
||||
Query q = new TrieRangeQuery("exampleDouble", new Double(1.0E17), new Double(2.0E17), TrieUtils.VARIANT_8BIT);
|
||||
// OR, Java 1.5, using autoboxing:
|
||||
Query q = new TrieRangeQuery("exampleDouble", 1.0E17, 2.0E17, TrieUtils.VARIANT_8BIT);
|
||||
TopDocs docs = searcher.search(q, 10);
|
||||
for (int i = 0; i<docs.scoreDocs.length; i++) {
|
||||
Document doc = searcher.doc(docs.scoreDocs[i].doc);
|
||||
System.out.println(doc.get("exampleString"));
|
||||
// decode the stored numerical value (important!!!):
|
||||
System.out.println(TrieUtils.VARIANT_8BIT.trieCodedToDouble(doc.get("exampleDouble")));
|
||||
}
|
||||
</pre>
|
||||
|
||||
<h3>Performance</h3>
|
||||
|
|
|
@ -34,194 +34,194 @@ import org.apache.lucene.util.LuceneTestCase;
|
|||
|
||||
public class TestTrieRangeQuery extends LuceneTestCase
|
||||
{
|
||||
private static final long distance=66666;
|
||||
|
||||
private static Random rnd=new Random();
|
||||
private static RAMDirectory directory;
|
||||
private static IndexSearcher searcher;
|
||||
static {
|
||||
try {
|
||||
directory = new RAMDirectory();
|
||||
IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(),
|
||||
true, MaxFieldLength.UNLIMITED);
|
||||
|
||||
// Add a series of 10000 docs with increasing long values
|
||||
for (long l=0L; l<10000L; l++) {
|
||||
Document doc=new Document();
|
||||
// add fields, that have a distance to test general functionality
|
||||
TrieUtils.VARIANT_8BIT.addLongTrieCodedDocumentField(
|
||||
doc, "field8", distance*l, true /*index it*/, Field.Store.YES
|
||||
);
|
||||
TrieUtils.VARIANT_4BIT.addLongTrieCodedDocumentField(
|
||||
doc, "field4", distance*l, true /*index it*/, Field.Store.YES
|
||||
);
|
||||
TrieUtils.VARIANT_2BIT.addLongTrieCodedDocumentField(
|
||||
doc, "field2", distance*l, true /*index it*/, Field.Store.YES
|
||||
);
|
||||
// add ascending fields with a distance of 1 to test the correct splitting of range
|
||||
TrieUtils.VARIANT_8BIT.addLongTrieCodedDocumentField(
|
||||
doc, "ascfield8", l, true /*index it*/, Field.Store.NO
|
||||
);
|
||||
TrieUtils.VARIANT_4BIT.addLongTrieCodedDocumentField(
|
||||
doc, "ascfield4", l, true /*index it*/, Field.Store.NO
|
||||
);
|
||||
TrieUtils.VARIANT_2BIT.addLongTrieCodedDocumentField(
|
||||
doc, "ascfield2", l, true /*index it*/, Field.Store.NO
|
||||
);
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
|
||||
writer.close();
|
||||
searcher=new IndexSearcher(directory);
|
||||
} catch (Exception e) {
|
||||
throw new Error(e);
|
||||
}
|
||||
}
|
||||
|
||||
private void testRange(final TrieUtils variant) throws Exception {
|
||||
String field="field"+variant.TRIE_BITS;
|
||||
int count=3000;
|
||||
long lower=96666L, upper=lower + count*distance + 1234L;
|
||||
TrieRangeQuery q=new TrieRangeQuery(field, new Long(lower), new Long(upper), variant);
|
||||
TopDocs topDocs = searcher.search(q, null, 10000, Sort.INDEXORDER);
|
||||
System.out.println("Found "+q.getFilter().getLastNumberOfTerms()+" distinct terms in range for field '"+field+"'.");
|
||||
ScoreDoc[] sd = topDocs.scoreDocs;
|
||||
assertNotNull(sd);
|
||||
assertEquals("Score docs must match "+count+" docs, found "+sd.length+" docs", sd.length, count );
|
||||
Document doc=searcher.doc(sd[0].doc);
|
||||
assertEquals("First doc should be "+(2*distance), variant.trieCodedToLong(doc.get(field)), 2*distance );
|
||||
doc=searcher.doc(sd[sd.length-1].doc);
|
||||
assertEquals("Last doc should be "+((1+count)*distance), variant.trieCodedToLong(doc.get(field)), (1+count)*distance );
|
||||
}
|
||||
private static final long distance=66666;
|
||||
|
||||
private static Random rnd=new Random();
|
||||
private static RAMDirectory directory;
|
||||
private static IndexSearcher searcher;
|
||||
static {
|
||||
try {
|
||||
directory = new RAMDirectory();
|
||||
IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(),
|
||||
true, MaxFieldLength.UNLIMITED);
|
||||
|
||||
// Add a series of 10000 docs with increasing long values
|
||||
for (long l=0L; l<10000L; l++) {
|
||||
Document doc=new Document();
|
||||
// add fields, that have a distance to test general functionality
|
||||
TrieUtils.VARIANT_8BIT.addLongTrieCodedDocumentField(
|
||||
doc, "field8", distance*l, true /*index it*/, Field.Store.YES
|
||||
);
|
||||
TrieUtils.VARIANT_4BIT.addLongTrieCodedDocumentField(
|
||||
doc, "field4", distance*l, true /*index it*/, Field.Store.YES
|
||||
);
|
||||
TrieUtils.VARIANT_2BIT.addLongTrieCodedDocumentField(
|
||||
doc, "field2", distance*l, true /*index it*/, Field.Store.YES
|
||||
);
|
||||
// add ascending fields with a distance of 1 to test the correct splitting of range
|
||||
TrieUtils.VARIANT_8BIT.addLongTrieCodedDocumentField(
|
||||
doc, "ascfield8", l, true /*index it*/, Field.Store.NO
|
||||
);
|
||||
TrieUtils.VARIANT_4BIT.addLongTrieCodedDocumentField(
|
||||
doc, "ascfield4", l, true /*index it*/, Field.Store.NO
|
||||
);
|
||||
TrieUtils.VARIANT_2BIT.addLongTrieCodedDocumentField(
|
||||
doc, "ascfield2", l, true /*index it*/, Field.Store.NO
|
||||
);
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
|
||||
writer.close();
|
||||
searcher=new IndexSearcher(directory);
|
||||
} catch (Exception e) {
|
||||
throw new Error(e);
|
||||
}
|
||||
}
|
||||
|
||||
private void testRange(final TrieUtils variant) throws Exception {
|
||||
String field="field"+variant.TRIE_BITS;
|
||||
int count=3000;
|
||||
long lower=96666L, upper=lower + count*distance + 1234L;
|
||||
TrieRangeQuery q=new TrieRangeQuery(field, new Long(lower), new Long(upper), variant);
|
||||
TopDocs topDocs = searcher.search(q, null, 10000, Sort.INDEXORDER);
|
||||
System.out.println("Found "+q.getFilter().getLastNumberOfTerms()+" distinct terms in range for field '"+field+"'.");
|
||||
ScoreDoc[] sd = topDocs.scoreDocs;
|
||||
assertNotNull(sd);
|
||||
assertEquals("Score docs must match "+count+" docs, found "+sd.length+" docs", sd.length, count );
|
||||
Document doc=searcher.doc(sd[0].doc);
|
||||
assertEquals("First doc should be "+(2*distance), variant.trieCodedToLong(doc.get(field)), 2*distance );
|
||||
doc=searcher.doc(sd[sd.length-1].doc);
|
||||
assertEquals("Last doc should be "+((1+count)*distance), variant.trieCodedToLong(doc.get(field)), (1+count)*distance );
|
||||
}
|
||||
|
||||
public void testRange_8bit() throws Exception {
|
||||
testRange(TrieUtils.VARIANT_8BIT);
|
||||
}
|
||||
|
||||
public void testRange_4bit() throws Exception {
|
||||
testRange(TrieUtils.VARIANT_4BIT);
|
||||
}
|
||||
|
||||
public void testRange_2bit() throws Exception {
|
||||
testRange(TrieUtils.VARIANT_2BIT);
|
||||
}
|
||||
|
||||
private void testLeftOpenRange(final TrieUtils variant) throws Exception {
|
||||
String field="field"+variant.TRIE_BITS;
|
||||
int count=3000;
|
||||
long upper=(count-1)*distance + 1234L;
|
||||
TrieRangeQuery q=new TrieRangeQuery(field, null, new Long(upper), variant);
|
||||
TopDocs topDocs = searcher.search(q, null, 10000, Sort.INDEXORDER);
|
||||
System.out.println("Found "+q.getFilter().getLastNumberOfTerms()+" distinct terms in left open range for field '"+field+"'.");
|
||||
ScoreDoc[] sd = topDocs.scoreDocs;
|
||||
assertNotNull(sd);
|
||||
assertEquals("Score docs must match "+count+" docs, found "+sd.length+" docs", sd.length, count );
|
||||
Document doc=searcher.doc(sd[0].doc);
|
||||
assertEquals("First doc should be 0", variant.trieCodedToLong(doc.get(field)), 0L );
|
||||
doc=searcher.doc(sd[sd.length-1].doc);
|
||||
assertEquals("Last doc should be "+((count-1)*distance), variant.trieCodedToLong(doc.get(field)), (count-1)*distance );
|
||||
}
|
||||
|
||||
public void testLeftOpenRange_8bit() throws Exception {
|
||||
testLeftOpenRange(TrieUtils.VARIANT_8BIT);
|
||||
}
|
||||
|
||||
public void testLeftOpenRange_4bit() throws Exception {
|
||||
testLeftOpenRange(TrieUtils.VARIANT_4BIT);
|
||||
}
|
||||
|
||||
public void testLeftOpenRange_2bit() throws Exception {
|
||||
testLeftOpenRange(TrieUtils.VARIANT_2BIT);
|
||||
}
|
||||
|
||||
private void testRandomTrieAndClassicRangeQuery(final TrieUtils variant) throws Exception {
|
||||
String field="field"+variant.TRIE_BITS;
|
||||
// 50 random tests, the tests may also return 0 results, if min>max, but this is ok
|
||||
for (int i=0; i<50; i++) {
|
||||
long lower=(long)(rnd.nextDouble()*10000L*distance);
|
||||
long upper=(long)(rnd.nextDouble()*10000L*distance);
|
||||
TrieRangeQuery tq=new TrieRangeQuery(field, new Long(lower), new Long(upper), variant);
|
||||
RangeQuery cq=new RangeQuery(field, variant.longToTrieCoded(lower), variant.longToTrieCoded(upper), true, true);
|
||||
cq.setConstantScoreRewrite(true);
|
||||
TopDocs tTopDocs = searcher.search(tq, 1);
|
||||
TopDocs cTopDocs = searcher.search(cq, 1);
|
||||
assertEquals("Returned count for TrieRangeQuery and RangeQuery must be equal", tTopDocs.totalHits, cTopDocs.totalHits );
|
||||
}
|
||||
}
|
||||
|
||||
public void testRandomTrieAndClassicRangeQuery_8bit() throws Exception {
|
||||
testRandomTrieAndClassicRangeQuery(TrieUtils.VARIANT_8BIT);
|
||||
}
|
||||
|
||||
public void testRandomTrieAndClassicRangeQuery_4bit() throws Exception {
|
||||
testRandomTrieAndClassicRangeQuery(TrieUtils.VARIANT_4BIT);
|
||||
}
|
||||
|
||||
public void testRandomTrieAndClassicRangeQuery_2bit() throws Exception {
|
||||
testRandomTrieAndClassicRangeQuery(TrieUtils.VARIANT_2BIT);
|
||||
}
|
||||
|
||||
private void testRangeSplit(final TrieUtils variant) throws Exception {
|
||||
String field="ascfield"+variant.TRIE_BITS;
|
||||
// 50 random tests
|
||||
for (int i=0; i<50; i++) {
|
||||
long lower=(long)(rnd.nextDouble()*10000L);
|
||||
long upper=(long)(rnd.nextDouble()*10000L);
|
||||
if (lower>upper) {
|
||||
long a=lower; lower=upper; upper=a;
|
||||
}
|
||||
TrieRangeQuery tq=new TrieRangeQuery(field, new Long(lower), new Long(upper), variant);
|
||||
TopDocs tTopDocs = searcher.search(tq, 1);
|
||||
assertEquals("Returned count of range query must be equal to inclusive range length", tTopDocs.totalHits, upper-lower+1 );
|
||||
}
|
||||
}
|
||||
public void testRange_8bit() throws Exception {
|
||||
testRange(TrieUtils.VARIANT_8BIT);
|
||||
}
|
||||
|
||||
public void testRange_4bit() throws Exception {
|
||||
testRange(TrieUtils.VARIANT_4BIT);
|
||||
}
|
||||
|
||||
public void testRange_2bit() throws Exception {
|
||||
testRange(TrieUtils.VARIANT_2BIT);
|
||||
}
|
||||
|
||||
private void testLeftOpenRange(final TrieUtils variant) throws Exception {
|
||||
String field="field"+variant.TRIE_BITS;
|
||||
int count=3000;
|
||||
long upper=(count-1)*distance + 1234L;
|
||||
TrieRangeQuery q=new TrieRangeQuery(field, null, new Long(upper), variant);
|
||||
TopDocs topDocs = searcher.search(q, null, 10000, Sort.INDEXORDER);
|
||||
System.out.println("Found "+q.getFilter().getLastNumberOfTerms()+" distinct terms in left open range for field '"+field+"'.");
|
||||
ScoreDoc[] sd = topDocs.scoreDocs;
|
||||
assertNotNull(sd);
|
||||
assertEquals("Score docs must match "+count+" docs, found "+sd.length+" docs", sd.length, count );
|
||||
Document doc=searcher.doc(sd[0].doc);
|
||||
assertEquals("First doc should be 0", variant.trieCodedToLong(doc.get(field)), 0L );
|
||||
doc=searcher.doc(sd[sd.length-1].doc);
|
||||
assertEquals("Last doc should be "+((count-1)*distance), variant.trieCodedToLong(doc.get(field)), (count-1)*distance );
|
||||
}
|
||||
|
||||
public void testLeftOpenRange_8bit() throws Exception {
|
||||
testLeftOpenRange(TrieUtils.VARIANT_8BIT);
|
||||
}
|
||||
|
||||
public void testLeftOpenRange_4bit() throws Exception {
|
||||
testLeftOpenRange(TrieUtils.VARIANT_4BIT);
|
||||
}
|
||||
|
||||
public void testLeftOpenRange_2bit() throws Exception {
|
||||
testLeftOpenRange(TrieUtils.VARIANT_2BIT);
|
||||
}
|
||||
|
||||
private void testRandomTrieAndClassicRangeQuery(final TrieUtils variant) throws Exception {
|
||||
String field="field"+variant.TRIE_BITS;
|
||||
// 50 random tests, the tests may also return 0 results, if min>max, but this is ok
|
||||
for (int i=0; i<50; i++) {
|
||||
long lower=(long)(rnd.nextDouble()*10000L*distance);
|
||||
long upper=(long)(rnd.nextDouble()*10000L*distance);
|
||||
TrieRangeQuery tq=new TrieRangeQuery(field, new Long(lower), new Long(upper), variant);
|
||||
RangeQuery cq=new RangeQuery(field, variant.longToTrieCoded(lower), variant.longToTrieCoded(upper), true, true);
|
||||
cq.setConstantScoreRewrite(true);
|
||||
TopDocs tTopDocs = searcher.search(tq, 1);
|
||||
TopDocs cTopDocs = searcher.search(cq, 1);
|
||||
assertEquals("Returned count for TrieRangeQuery and RangeQuery must be equal", tTopDocs.totalHits, cTopDocs.totalHits );
|
||||
}
|
||||
}
|
||||
|
||||
public void testRandomTrieAndClassicRangeQuery_8bit() throws Exception {
|
||||
testRandomTrieAndClassicRangeQuery(TrieUtils.VARIANT_8BIT);
|
||||
}
|
||||
|
||||
public void testRandomTrieAndClassicRangeQuery_4bit() throws Exception {
|
||||
testRandomTrieAndClassicRangeQuery(TrieUtils.VARIANT_4BIT);
|
||||
}
|
||||
|
||||
public void testRandomTrieAndClassicRangeQuery_2bit() throws Exception {
|
||||
testRandomTrieAndClassicRangeQuery(TrieUtils.VARIANT_2BIT);
|
||||
}
|
||||
|
||||
private void testRangeSplit(final TrieUtils variant) throws Exception {
|
||||
String field="ascfield"+variant.TRIE_BITS;
|
||||
// 50 random tests
|
||||
for (int i=0; i<50; i++) {
|
||||
long lower=(long)(rnd.nextDouble()*10000L);
|
||||
long upper=(long)(rnd.nextDouble()*10000L);
|
||||
if (lower>upper) {
|
||||
long a=lower; lower=upper; upper=a;
|
||||
}
|
||||
TrieRangeQuery tq=new TrieRangeQuery(field, new Long(lower), new Long(upper), variant);
|
||||
TopDocs tTopDocs = searcher.search(tq, 1);
|
||||
assertEquals("Returned count of range query must be equal to inclusive range length", tTopDocs.totalHits, upper-lower+1 );
|
||||
}
|
||||
}
|
||||
|
||||
public void testRangeSplit_8bit() throws Exception {
|
||||
testRangeSplit(TrieUtils.VARIANT_8BIT);
|
||||
}
|
||||
|
||||
public void testRangeSplit_4bit() throws Exception {
|
||||
testRangeSplit(TrieUtils.VARIANT_4BIT);
|
||||
}
|
||||
|
||||
public void testRangeSplit_2bit() throws Exception {
|
||||
testRangeSplit(TrieUtils.VARIANT_2BIT);
|
||||
}
|
||||
|
||||
private void testSorting(final TrieUtils variant) throws Exception {
|
||||
String field="field"+variant.TRIE_BITS;
|
||||
// 10 random tests, the index order is ascending,
|
||||
// so using a reverse sort field should retun descending documents
|
||||
for (int i=0; i<10; i++) {
|
||||
long lower=(long)(rnd.nextDouble()*10000L*distance);
|
||||
long upper=(long)(rnd.nextDouble()*10000L*distance);
|
||||
if (lower>upper) {
|
||||
long a=lower; lower=upper; upper=a;
|
||||
}
|
||||
TrieRangeQuery tq=new TrieRangeQuery(field, new Long(lower), new Long(upper), variant);
|
||||
TopDocs topDocs = searcher.search(tq, null, 10000, new Sort(variant.getSortField(field, true)));
|
||||
if (topDocs.totalHits==0) continue;
|
||||
ScoreDoc[] sd = topDocs.scoreDocs;
|
||||
assertNotNull(sd);
|
||||
long last=variant.trieCodedToLong(searcher.doc(sd[0].doc).get(field));
|
||||
for (int j=1; j<sd.length; j++) {
|
||||
long act=variant.trieCodedToLong(searcher.doc(sd[j].doc).get(field));
|
||||
assertTrue("Docs should be sorted backwards", last>act );
|
||||
last=act;
|
||||
}
|
||||
}
|
||||
}
|
||||
public void testRangeSplit_8bit() throws Exception {
|
||||
testRangeSplit(TrieUtils.VARIANT_8BIT);
|
||||
}
|
||||
|
||||
public void testRangeSplit_4bit() throws Exception {
|
||||
testRangeSplit(TrieUtils.VARIANT_4BIT);
|
||||
}
|
||||
|
||||
public void testRangeSplit_2bit() throws Exception {
|
||||
testRangeSplit(TrieUtils.VARIANT_2BIT);
|
||||
}
|
||||
|
||||
private void testSorting(final TrieUtils variant) throws Exception {
|
||||
String field="field"+variant.TRIE_BITS;
|
||||
// 10 random tests, the index order is ascending,
|
||||
// so using a reverse sort field should retun descending documents
|
||||
for (int i=0; i<10; i++) {
|
||||
long lower=(long)(rnd.nextDouble()*10000L*distance);
|
||||
long upper=(long)(rnd.nextDouble()*10000L*distance);
|
||||
if (lower>upper) {
|
||||
long a=lower; lower=upper; upper=a;
|
||||
}
|
||||
TrieRangeQuery tq=new TrieRangeQuery(field, new Long(lower), new Long(upper), variant);
|
||||
TopDocs topDocs = searcher.search(tq, null, 10000, new Sort(variant.getSortField(field, true)));
|
||||
if (topDocs.totalHits==0) continue;
|
||||
ScoreDoc[] sd = topDocs.scoreDocs;
|
||||
assertNotNull(sd);
|
||||
long last=variant.trieCodedToLong(searcher.doc(sd[0].doc).get(field));
|
||||
for (int j=1; j<sd.length; j++) {
|
||||
long act=variant.trieCodedToLong(searcher.doc(sd[j].doc).get(field));
|
||||
assertTrue("Docs should be sorted backwards", last>act );
|
||||
last=act;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testSorting_8bit() throws Exception {
|
||||
testSorting(TrieUtils.VARIANT_8BIT);
|
||||
}
|
||||
|
||||
public void testSorting_4bit() throws Exception {
|
||||
testSorting(TrieUtils.VARIANT_4BIT);
|
||||
}
|
||||
|
||||
public void testSorting_2bit() throws Exception {
|
||||
testSorting(TrieUtils.VARIANT_2BIT);
|
||||
}
|
||||
|
||||
public void testSorting_8bit() throws Exception {
|
||||
testSorting(TrieUtils.VARIANT_8BIT);
|
||||
}
|
||||
|
||||
public void testSorting_4bit() throws Exception {
|
||||
testSorting(TrieUtils.VARIANT_4BIT);
|
||||
}
|
||||
|
||||
public void testSorting_2bit() throws Exception {
|
||||
testSorting(TrieUtils.VARIANT_2BIT);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -24,148 +24,148 @@ import org.apache.lucene.util.LuceneTestCase;
|
|||
|
||||
public class TestTrieUtils extends LuceneTestCase {
|
||||
|
||||
public void testSpecialValues() throws Exception {
|
||||
// Variant 8bit values
|
||||
assertEquals( TrieUtils.VARIANT_8BIT.TRIE_CODED_NUMERIC_MIN, "\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100");
|
||||
assertEquals( TrieUtils.VARIANT_8BIT.TRIE_CODED_NUMERIC_MAX, "\u01ff\u01ff\u01ff\u01ff\u01ff\u01ff\u01ff\u01ff");
|
||||
assertEquals( TrieUtils.VARIANT_8BIT.longToTrieCoded(-1), "\u017f\u01ff\u01ff\u01ff\u01ff\u01ff\u01ff\u01ff");
|
||||
assertEquals( TrieUtils.VARIANT_8BIT.longToTrieCoded(0), "\u0180\u0100\u0100\u0100\u0100\u0100\u0100\u0100");
|
||||
assertEquals( TrieUtils.VARIANT_8BIT.longToTrieCoded(1), "\u0180\u0100\u0100\u0100\u0100\u0100\u0100\u0101");
|
||||
// Variant 4bit values
|
||||
assertEquals( TrieUtils.VARIANT_4BIT.TRIE_CODED_NUMERIC_MIN, "\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100");
|
||||
assertEquals( TrieUtils.VARIANT_4BIT.TRIE_CODED_NUMERIC_MAX, "\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f");
|
||||
assertEquals( TrieUtils.VARIANT_4BIT.longToTrieCoded(-1), "\u0107\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f");
|
||||
assertEquals( TrieUtils.VARIANT_4BIT.longToTrieCoded(0), "\u0108\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100");
|
||||
assertEquals( TrieUtils.VARIANT_4BIT.longToTrieCoded(1), "\u0108\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0101");
|
||||
// TODO: 2bit tests
|
||||
}
|
||||
public void testSpecialValues() throws Exception {
|
||||
// Variant 8bit values
|
||||
assertEquals( TrieUtils.VARIANT_8BIT.TRIE_CODED_NUMERIC_MIN, "\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100");
|
||||
assertEquals( TrieUtils.VARIANT_8BIT.TRIE_CODED_NUMERIC_MAX, "\u01ff\u01ff\u01ff\u01ff\u01ff\u01ff\u01ff\u01ff");
|
||||
assertEquals( TrieUtils.VARIANT_8BIT.longToTrieCoded(-1), "\u017f\u01ff\u01ff\u01ff\u01ff\u01ff\u01ff\u01ff");
|
||||
assertEquals( TrieUtils.VARIANT_8BIT.longToTrieCoded(0), "\u0180\u0100\u0100\u0100\u0100\u0100\u0100\u0100");
|
||||
assertEquals( TrieUtils.VARIANT_8BIT.longToTrieCoded(1), "\u0180\u0100\u0100\u0100\u0100\u0100\u0100\u0101");
|
||||
// Variant 4bit values
|
||||
assertEquals( TrieUtils.VARIANT_4BIT.TRIE_CODED_NUMERIC_MIN, "\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100");
|
||||
assertEquals( TrieUtils.VARIANT_4BIT.TRIE_CODED_NUMERIC_MAX, "\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f");
|
||||
assertEquals( TrieUtils.VARIANT_4BIT.longToTrieCoded(-1), "\u0107\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f\u010f");
|
||||
assertEquals( TrieUtils.VARIANT_4BIT.longToTrieCoded(0), "\u0108\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100");
|
||||
assertEquals( TrieUtils.VARIANT_4BIT.longToTrieCoded(1), "\u0108\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0100\u0101");
|
||||
// TODO: 2bit tests
|
||||
}
|
||||
|
||||
private void testBinaryOrderingAndIncrement(TrieUtils variant) throws Exception {
|
||||
// generate a series of encoded longs, each numerical one bigger than the one before
|
||||
String last=null;
|
||||
for (long l=-100000L; l<100000L; l++) {
|
||||
String act=variant.longToTrieCoded(l);
|
||||
if (last!=null) {
|
||||
// test if smaller
|
||||
assertTrue( last.compareTo(act) < 0 );
|
||||
// test the increment method (the last incremented by one should be the actual)
|
||||
assertEquals( variant.incrementTrieCoded(last), act );
|
||||
// test the decrement method (the actual decremented by one should be the last)
|
||||
assertEquals( last, variant.decrementTrieCoded(act) );
|
||||
}
|
||||
// next step
|
||||
last=act;
|
||||
}
|
||||
}
|
||||
private void testBinaryOrderingAndIncrement(TrieUtils variant) throws Exception {
|
||||
// generate a series of encoded longs, each numerical one bigger than the one before
|
||||
String last=null;
|
||||
for (long l=-100000L; l<100000L; l++) {
|
||||
String act=variant.longToTrieCoded(l);
|
||||
if (last!=null) {
|
||||
// test if smaller
|
||||
assertTrue( last.compareTo(act) < 0 );
|
||||
// test the increment method (the last incremented by one should be the actual)
|
||||
assertEquals( variant.incrementTrieCoded(last), act );
|
||||
// test the decrement method (the actual decremented by one should be the last)
|
||||
assertEquals( last, variant.decrementTrieCoded(act) );
|
||||
}
|
||||
// next step
|
||||
last=act;
|
||||
}
|
||||
}
|
||||
|
||||
public void testBinaryOrderingAndIncrement_8bit() throws Exception {
|
||||
testBinaryOrderingAndIncrement(TrieUtils.VARIANT_8BIT);
|
||||
}
|
||||
public void testBinaryOrderingAndIncrement_8bit() throws Exception {
|
||||
testBinaryOrderingAndIncrement(TrieUtils.VARIANT_8BIT);
|
||||
}
|
||||
|
||||
public void testBinaryOrderingAndIncrement_4bit() throws Exception {
|
||||
testBinaryOrderingAndIncrement(TrieUtils.VARIANT_8BIT);
|
||||
}
|
||||
public void testBinaryOrderingAndIncrement_4bit() throws Exception {
|
||||
testBinaryOrderingAndIncrement(TrieUtils.VARIANT_8BIT);
|
||||
}
|
||||
|
||||
public void testBinaryOrderingAndIncrement_2bit() throws Exception {
|
||||
testBinaryOrderingAndIncrement(TrieUtils.VARIANT_8BIT);
|
||||
}
|
||||
public void testBinaryOrderingAndIncrement_2bit() throws Exception {
|
||||
testBinaryOrderingAndIncrement(TrieUtils.VARIANT_8BIT);
|
||||
}
|
||||
|
||||
private void testLongs(TrieUtils variant) throws Exception {
|
||||
long[] vals=new long[]{
|
||||
Long.MIN_VALUE, -5000L, -4000L, -3000L, -2000L, -1000L, 0L,
|
||||
1L, 10L, 300L, 5000L, Long.MAX_VALUE-2, Long.MAX_VALUE-1, Long.MAX_VALUE
|
||||
};
|
||||
String[] trieVals=new String[vals.length];
|
||||
|
||||
// check back and forth conversion
|
||||
for (int i=0; i<vals.length; i++) {
|
||||
trieVals[i]=variant.longToTrieCoded(vals[i]);
|
||||
assertEquals( "Back and forth conversion should return same value", vals[i], variant.trieCodedToLong(trieVals[i]) );
|
||||
assertEquals( "Automatic back conversion with encoding detection should return same value", vals[i], TrieUtils.trieCodedToLongAuto(trieVals[i]) );
|
||||
}
|
||||
|
||||
// check sort order (trieVals should be ascending)
|
||||
for (int i=1; i<vals.length; i++) {
|
||||
assertTrue( trieVals[i-1].compareTo( trieVals[i] ) < 0 );
|
||||
}
|
||||
}
|
||||
private void testLongs(TrieUtils variant) throws Exception {
|
||||
long[] vals=new long[]{
|
||||
Long.MIN_VALUE, -5000L, -4000L, -3000L, -2000L, -1000L, 0L,
|
||||
1L, 10L, 300L, 5000L, Long.MAX_VALUE-2, Long.MAX_VALUE-1, Long.MAX_VALUE
|
||||
};
|
||||
String[] trieVals=new String[vals.length];
|
||||
|
||||
// check back and forth conversion
|
||||
for (int i=0; i<vals.length; i++) {
|
||||
trieVals[i]=variant.longToTrieCoded(vals[i]);
|
||||
assertEquals( "Back and forth conversion should return same value", vals[i], variant.trieCodedToLong(trieVals[i]) );
|
||||
assertEquals( "Automatic back conversion with encoding detection should return same value", vals[i], TrieUtils.trieCodedToLongAuto(trieVals[i]) );
|
||||
}
|
||||
|
||||
// check sort order (trieVals should be ascending)
|
||||
for (int i=1; i<vals.length; i++) {
|
||||
assertTrue( trieVals[i-1].compareTo( trieVals[i] ) < 0 );
|
||||
}
|
||||
}
|
||||
|
||||
public void testLongs_8bit() throws Exception {
|
||||
testLongs(TrieUtils.VARIANT_8BIT);
|
||||
}
|
||||
public void testLongs_8bit() throws Exception {
|
||||
testLongs(TrieUtils.VARIANT_8BIT);
|
||||
}
|
||||
|
||||
public void testLongs_4bit() throws Exception {
|
||||
testLongs(TrieUtils.VARIANT_4BIT);
|
||||
}
|
||||
public void testLongs_4bit() throws Exception {
|
||||
testLongs(TrieUtils.VARIANT_4BIT);
|
||||
}
|
||||
|
||||
public void testLongs_2bit() throws Exception {
|
||||
testLongs(TrieUtils.VARIANT_2BIT);
|
||||
}
|
||||
public void testLongs_2bit() throws Exception {
|
||||
testLongs(TrieUtils.VARIANT_2BIT);
|
||||
}
|
||||
|
||||
private void testDoubles(TrieUtils variant) throws Exception {
|
||||
double[] vals=new double[]{
|
||||
Double.NEGATIVE_INFINITY, -2.3E25, -1.0E15, -1.0, -1.0E-1, -1.0E-2, -0.0,
|
||||
+0.0, 1.0E-2, 1.0E-1, 1.0, 1.0E15, 2.3E25, Double.POSITIVE_INFINITY
|
||||
};
|
||||
String[] trieVals=new String[vals.length];
|
||||
|
||||
// check back and forth conversion
|
||||
for (int i=0; i<vals.length; i++) {
|
||||
trieVals[i]=variant.doubleToTrieCoded(vals[i]);
|
||||
assertTrue( "Back and forth conversion should return same value", vals[i]==variant.trieCodedToDouble(trieVals[i]) );
|
||||
assertTrue( "Automatic back conversion with encoding detection should return same value", vals[i]==TrieUtils.trieCodedToDoubleAuto(trieVals[i]) );
|
||||
}
|
||||
|
||||
// check sort order (trieVals should be ascending)
|
||||
for (int i=1; i<vals.length; i++) {
|
||||
assertTrue( trieVals[i-1].compareTo( trieVals[i] ) < 0 );
|
||||
}
|
||||
}
|
||||
private void testDoubles(TrieUtils variant) throws Exception {
|
||||
double[] vals=new double[]{
|
||||
Double.NEGATIVE_INFINITY, -2.3E25, -1.0E15, -1.0, -1.0E-1, -1.0E-2, -0.0,
|
||||
+0.0, 1.0E-2, 1.0E-1, 1.0, 1.0E15, 2.3E25, Double.POSITIVE_INFINITY
|
||||
};
|
||||
String[] trieVals=new String[vals.length];
|
||||
|
||||
// check back and forth conversion
|
||||
for (int i=0; i<vals.length; i++) {
|
||||
trieVals[i]=variant.doubleToTrieCoded(vals[i]);
|
||||
assertTrue( "Back and forth conversion should return same value", vals[i]==variant.trieCodedToDouble(trieVals[i]) );
|
||||
assertTrue( "Automatic back conversion with encoding detection should return same value", vals[i]==TrieUtils.trieCodedToDoubleAuto(trieVals[i]) );
|
||||
}
|
||||
|
||||
// check sort order (trieVals should be ascending)
|
||||
for (int i=1; i<vals.length; i++) {
|
||||
assertTrue( trieVals[i-1].compareTo( trieVals[i] ) < 0 );
|
||||
}
|
||||
}
|
||||
|
||||
public void testDoubles_8bit() throws Exception {
|
||||
testDoubles(TrieUtils.VARIANT_8BIT);
|
||||
}
|
||||
public void testDoubles_8bit() throws Exception {
|
||||
testDoubles(TrieUtils.VARIANT_8BIT);
|
||||
}
|
||||
|
||||
public void testDoubles_4bit() throws Exception {
|
||||
testDoubles(TrieUtils.VARIANT_4BIT);
|
||||
}
|
||||
public void testDoubles_4bit() throws Exception {
|
||||
testDoubles(TrieUtils.VARIANT_4BIT);
|
||||
}
|
||||
|
||||
public void testDoubles_2bit() throws Exception {
|
||||
testDoubles(TrieUtils.VARIANT_2BIT);
|
||||
}
|
||||
public void testDoubles_2bit() throws Exception {
|
||||
testDoubles(TrieUtils.VARIANT_2BIT);
|
||||
}
|
||||
|
||||
private void testDates(TrieUtils variant) throws Exception {
|
||||
Date[] vals=new Date[]{
|
||||
new GregorianCalendar(1000,1,1).getTime(),
|
||||
new GregorianCalendar(1999,1,1).getTime(),
|
||||
new GregorianCalendar(2000,1,1).getTime(),
|
||||
new GregorianCalendar(2001,1,1).getTime()
|
||||
};
|
||||
String[] trieVals=new String[vals.length];
|
||||
|
||||
// check back and forth conversion
|
||||
for (int i=0; i<vals.length; i++) {
|
||||
trieVals[i]=variant.dateToTrieCoded(vals[i]);
|
||||
assertEquals( "Back and forth conversion should return same value", vals[i], variant.trieCodedToDate(trieVals[i]) );
|
||||
assertEquals( "Automatic back conversion with encoding detection should return same value", vals[i], TrieUtils.trieCodedToDateAuto(trieVals[i]) );
|
||||
}
|
||||
|
||||
// check sort order (trieVals should be ascending)
|
||||
for (int i=1; i<vals.length; i++) {
|
||||
assertTrue( trieVals[i-1].compareTo( trieVals[i] ) < 0 );
|
||||
}
|
||||
}
|
||||
private void testDates(TrieUtils variant) throws Exception {
|
||||
Date[] vals=new Date[]{
|
||||
new GregorianCalendar(1000,1,1).getTime(),
|
||||
new GregorianCalendar(1999,1,1).getTime(),
|
||||
new GregorianCalendar(2000,1,1).getTime(),
|
||||
new GregorianCalendar(2001,1,1).getTime()
|
||||
};
|
||||
String[] trieVals=new String[vals.length];
|
||||
|
||||
// check back and forth conversion
|
||||
for (int i=0; i<vals.length; i++) {
|
||||
trieVals[i]=variant.dateToTrieCoded(vals[i]);
|
||||
assertEquals( "Back and forth conversion should return same value", vals[i], variant.trieCodedToDate(trieVals[i]) );
|
||||
assertEquals( "Automatic back conversion with encoding detection should return same value", vals[i], TrieUtils.trieCodedToDateAuto(trieVals[i]) );
|
||||
}
|
||||
|
||||
// check sort order (trieVals should be ascending)
|
||||
for (int i=1; i<vals.length; i++) {
|
||||
assertTrue( trieVals[i-1].compareTo( trieVals[i] ) < 0 );
|
||||
}
|
||||
}
|
||||
|
||||
public void testDates_8bit() throws Exception {
|
||||
testDates(TrieUtils.VARIANT_8BIT);
|
||||
}
|
||||
public void testDates_8bit() throws Exception {
|
||||
testDates(TrieUtils.VARIANT_8BIT);
|
||||
}
|
||||
|
||||
public void testDates_4bit() throws Exception {
|
||||
testDates(TrieUtils.VARIANT_4BIT);
|
||||
}
|
||||
public void testDates_4bit() throws Exception {
|
||||
testDates(TrieUtils.VARIANT_4BIT);
|
||||
}
|
||||
|
||||
public void testDates_2bit() throws Exception {
|
||||
testDates(TrieUtils.VARIANT_2BIT);
|
||||
}
|
||||
public void testDates_2bit() throws Exception {
|
||||
testDates(TrieUtils.VARIANT_2BIT);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue