mirror of https://github.com/apache/lucene.git
LUCENE-1712: Set default precisionStep for NumericField and NumericRange*
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@793823 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
91aedd6685
commit
b2a4bc68ad
|
@ -516,7 +516,7 @@ New features
|
||||||
See the Javadocs for NGramDistance.java for a reference paper on why
|
See the Javadocs for NGramDistance.java for a reference paper on why
|
||||||
this is helpful (Tom Morton via Grant Ingersoll)
|
this is helpful (Tom Morton via Grant Ingersoll)
|
||||||
|
|
||||||
27. LUCENE-1470, LUCENE-1582, LUCENE-1602, LUCENE-1673, LUCENE-1701:
|
27. LUCENE-1470, LUCENE-1582, LUCENE-1602, LUCENE-1673, LUCENE-1701, LUCENE-1712:
|
||||||
Added NumericRangeQuery and NumericRangeFilter, a fast alternative to
|
Added NumericRangeQuery and NumericRangeFilter, a fast alternative to
|
||||||
RangeQuery/RangeFilter for numeric searches. They depend on a specific
|
RangeQuery/RangeFilter for numeric searches. They depend on a specific
|
||||||
structure of terms in the index that can be created by indexing
|
structure of terms in the index that can be created by indexing
|
||||||
|
|
|
@ -77,7 +77,10 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
*
|
*
|
||||||
* <p>Values indexed by this stream can be loaded into the {@link FieldCache}
|
* <p>Values indexed by this stream can be loaded into the {@link FieldCache}
|
||||||
* and can be sorted (use {@link SortField}{@code .TYPE} to specify the correct
|
* and can be sorted (use {@link SortField}{@code .TYPE} to specify the correct
|
||||||
* type; {@link SortField#AUTO} does not work with this type of field)
|
* type; {@link SortField#AUTO} does not work with this type of field).
|
||||||
|
* Values solely used for sorting can be indexed using a <code>precisionStep</code>
|
||||||
|
* of {@link Integer#MAX_VALUE} (at least ≥64), because this step only produces
|
||||||
|
* one value token with highest precision.
|
||||||
*
|
*
|
||||||
* <p><font color="red"><b>NOTE:</b> This API is experimental and
|
* <p><font color="red"><b>NOTE:</b> This API is experimental and
|
||||||
* might change in incompatible ways in the next release.</font>
|
* might change in incompatible ways in the next release.</font>
|
||||||
|
@ -86,24 +89,30 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
*/
|
*/
|
||||||
public final class NumericTokenStream extends TokenStream {
|
public final class NumericTokenStream extends TokenStream {
|
||||||
|
|
||||||
/** The full precision 64 bit token gets this token type assigned. */
|
/** The full precision token gets this token type assigned. */
|
||||||
public static final String TOKEN_TYPE_FULL_PREC_64 = "fullPrecNumeric64";
|
public static final String TOKEN_TYPE_FULL_PREC = "fullPrecNumeric";
|
||||||
|
|
||||||
/** The lower precision 64 bit tokens gets this token type assigned. */
|
/** The lower precision tokens gets this token type assigned. */
|
||||||
public static final String TOKEN_TYPE_LOWER_PREC_64 = "lowerPrecNumeric64";
|
public static final String TOKEN_TYPE_LOWER_PREC = "lowerPrecNumeric";
|
||||||
|
|
||||||
/** The full precision 32 bit token gets this token type assigned. */
|
|
||||||
public static final String TOKEN_TYPE_FULL_PREC_32 = "fullPrecNumeric32";
|
|
||||||
|
|
||||||
/** The lower precision 32 bit tokens gets this token type assigned. */
|
|
||||||
public static final String TOKEN_TYPE_LOWER_PREC_32 = "lowerPrecNumeric32";
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a token stream for numeric values. The stream is not yet initialized,
|
* Creates a token stream for numeric values using the default <code>precisionStep</code>
|
||||||
|
* {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). The stream is not yet initialized,
|
||||||
|
* before using set a value using the various set<em>???</em>Value() methods.
|
||||||
|
*/
|
||||||
|
public NumericTokenStream() {
|
||||||
|
this(NumericUtils.PRECISION_STEP_DEFAULT);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a token stream for numeric values with the specified
|
||||||
|
* <code>precisionStep</code>. The stream is not yet initialized,
|
||||||
* before using set a value using the various set<em>???</em>Value() methods.
|
* before using set a value using the various set<em>???</em>Value() methods.
|
||||||
*/
|
*/
|
||||||
public NumericTokenStream(final int precisionStep) {
|
public NumericTokenStream(final int precisionStep) {
|
||||||
this.precisionStep = precisionStep;
|
this.precisionStep = precisionStep;
|
||||||
|
if (precisionStep < 1)
|
||||||
|
throw new IllegalArgumentException("precisionStep must be >=1");
|
||||||
termAtt = (TermAttribute) addAttribute(TermAttribute.class);
|
termAtt = (TermAttribute) addAttribute(TermAttribute.class);
|
||||||
typeAtt = (TypeAttribute) addAttribute(TypeAttribute.class);
|
typeAtt = (TypeAttribute) addAttribute(TypeAttribute.class);
|
||||||
posIncrAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
|
posIncrAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
|
||||||
|
@ -165,8 +174,6 @@ public final class NumericTokenStream extends TokenStream {
|
||||||
public void reset() {
|
public void reset() {
|
||||||
if (valSize == 0)
|
if (valSize == 0)
|
||||||
throw new IllegalStateException("call set???Value() before usage");
|
throw new IllegalStateException("call set???Value() before usage");
|
||||||
if (precisionStep < 1 || precisionStep > valSize)
|
|
||||||
throw new IllegalArgumentException("precisionStep may only be 1.."+valSize);
|
|
||||||
shift = 0;
|
shift = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -180,15 +187,13 @@ public final class NumericTokenStream extends TokenStream {
|
||||||
final char[] buffer;
|
final char[] buffer;
|
||||||
switch (valSize) {
|
switch (valSize) {
|
||||||
case 64:
|
case 64:
|
||||||
buffer = termAtt.resizeTermBuffer(NumericUtils.LONG_BUF_SIZE);
|
buffer = termAtt.resizeTermBuffer(NumericUtils.BUF_SIZE_LONG);
|
||||||
termAtt.setTermLength(NumericUtils.longToPrefixCoded(value, shift, buffer));
|
termAtt.setTermLength(NumericUtils.longToPrefixCoded(value, shift, buffer));
|
||||||
typeAtt.setType((shift == 0) ? TOKEN_TYPE_FULL_PREC_64 : TOKEN_TYPE_LOWER_PREC_64);
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 32:
|
case 32:
|
||||||
buffer = termAtt.resizeTermBuffer(NumericUtils.INT_BUF_SIZE);
|
buffer = termAtt.resizeTermBuffer(NumericUtils.BUF_SIZE_INT);
|
||||||
termAtt.setTermLength(NumericUtils.intToPrefixCoded((int) value, shift, buffer));
|
termAtt.setTermLength(NumericUtils.intToPrefixCoded((int) value, shift, buffer));
|
||||||
typeAtt.setType((shift == 0) ? TOKEN_TYPE_FULL_PREC_32 : TOKEN_TYPE_LOWER_PREC_32);
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
@ -196,6 +201,7 @@ public final class NumericTokenStream extends TokenStream {
|
||||||
throw new IllegalArgumentException("valSize must be 32 or 64");
|
throw new IllegalArgumentException("valSize must be 32 or 64");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
typeAtt.setType((shift == 0) ? TOKEN_TYPE_FULL_PREC : TOKEN_TYPE_LOWER_PREC);
|
||||||
posIncrAtt.setPositionIncrement((shift == 0) ? 1 : 0);
|
posIncrAtt.setPositionIncrement((shift == 0) ? 1 : 0);
|
||||||
shift += precisionStep;
|
shift += precisionStep;
|
||||||
return true;
|
return true;
|
||||||
|
@ -215,15 +221,13 @@ public final class NumericTokenStream extends TokenStream {
|
||||||
final char[] buffer;
|
final char[] buffer;
|
||||||
switch (valSize) {
|
switch (valSize) {
|
||||||
case 64:
|
case 64:
|
||||||
buffer = reusableToken.resizeTermBuffer(NumericUtils.LONG_BUF_SIZE);
|
buffer = reusableToken.resizeTermBuffer(NumericUtils.BUF_SIZE_LONG);
|
||||||
reusableToken.setTermLength(NumericUtils.longToPrefixCoded(value, shift, buffer));
|
reusableToken.setTermLength(NumericUtils.longToPrefixCoded(value, shift, buffer));
|
||||||
reusableToken.setType((shift == 0) ? TOKEN_TYPE_FULL_PREC_64 : TOKEN_TYPE_LOWER_PREC_64);
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 32:
|
case 32:
|
||||||
buffer = reusableToken.resizeTermBuffer(NumericUtils.INT_BUF_SIZE);
|
buffer = reusableToken.resizeTermBuffer(NumericUtils.BUF_SIZE_INT);
|
||||||
reusableToken.setTermLength(NumericUtils.intToPrefixCoded((int) value, shift, buffer));
|
reusableToken.setTermLength(NumericUtils.intToPrefixCoded((int) value, shift, buffer));
|
||||||
reusableToken.setType((shift == 0) ? TOKEN_TYPE_FULL_PREC_32 : TOKEN_TYPE_LOWER_PREC_32);
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
@ -231,6 +235,7 @@ public final class NumericTokenStream extends TokenStream {
|
||||||
throw new IllegalArgumentException("valSize must be 32 or 64");
|
throw new IllegalArgumentException("valSize must be 32 or 64");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
reusableToken.setType((shift == 0) ? TOKEN_TYPE_FULL_PREC : TOKEN_TYPE_LOWER_PREC);
|
||||||
reusableToken.setPositionIncrement((shift == 0) ? 1 : 0);
|
reusableToken.setPositionIncrement((shift == 0) ? 1 : 0);
|
||||||
shift += precisionStep;
|
shift += precisionStep;
|
||||||
return reusableToken;
|
return reusableToken;
|
||||||
|
|
|
@ -21,6 +21,7 @@ import java.io.Reader;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.NumericTokenStream;
|
import org.apache.lucene.analysis.NumericTokenStream;
|
||||||
|
import org.apache.lucene.util.NumericUtils;
|
||||||
import org.apache.lucene.search.NumericRangeQuery; // javadocs
|
import org.apache.lucene.search.NumericRangeQuery; // javadocs
|
||||||
import org.apache.lucene.search.NumericRangeFilter; // javadocs
|
import org.apache.lucene.search.NumericRangeFilter; // javadocs
|
||||||
import org.apache.lucene.search.SortField; // javadocs
|
import org.apache.lucene.search.SortField; // javadocs
|
||||||
|
@ -72,7 +73,10 @@ import org.apache.lucene.search.FieldCache; // javadocs
|
||||||
*
|
*
|
||||||
* <p>Values indexed by this field can be loaded into the {@link FieldCache}
|
* <p>Values indexed by this field can be loaded into the {@link FieldCache}
|
||||||
* and can be sorted (use {@link SortField}{@code .TYPE} to specify the correct
|
* and can be sorted (use {@link SortField}{@code .TYPE} to specify the correct
|
||||||
* type; {@link SortField#AUTO} does not work with this type of field)
|
* type; {@link SortField#AUTO} does not work with this type of field).
|
||||||
|
* Values solely used for sorting can be indexed using a <code>precisionStep</code>
|
||||||
|
* of {@link Integer#MAX_VALUE} (at least ≥64), because this step only produces
|
||||||
|
* one value token with highest precision.
|
||||||
*
|
*
|
||||||
* <p><font color="red"><b>NOTE:</b> This API is experimental and
|
* <p><font color="red"><b>NOTE:</b> This API is experimental and
|
||||||
* might change in incompatible ways in the next release.</font>
|
* might change in incompatible ways in the next release.</font>
|
||||||
|
@ -84,7 +88,34 @@ public final class NumericField extends AbstractField {
|
||||||
private final NumericTokenStream tokenStream;
|
private final NumericTokenStream tokenStream;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a field for numeric values. The instance is not yet initialized with
|
* Creates a field for numeric values using the default <code>precisionStep</code>
|
||||||
|
* {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). The instance is not yet initialized with
|
||||||
|
* a numeric value, before indexing a document containing this field,
|
||||||
|
* set a value using the various set<em>???</em>Value() methods.
|
||||||
|
* This constrcutor creates an indexed, but not stored field.
|
||||||
|
* @param name the field name
|
||||||
|
*/
|
||||||
|
public NumericField(String name) {
|
||||||
|
this(name, NumericUtils.PRECISION_STEP_DEFAULT, Field.Store.NO, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a field for numeric values using the default <code>precisionStep</code>
|
||||||
|
* {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). The instance is not yet initialized with
|
||||||
|
* a numeric value, before indexing a document containing this field,
|
||||||
|
* set a value using the various set<em>???</em>Value() methods.
|
||||||
|
* @param name the field name
|
||||||
|
* @param store if the field should be stored in plain text form
|
||||||
|
* (according to <code>toString(value)</code> of the used data type)
|
||||||
|
* @param index if the field should be indexed using {@link NumericTokenStream}
|
||||||
|
*/
|
||||||
|
public NumericField(String name, Field.Store store, boolean index) {
|
||||||
|
this(name, NumericUtils.PRECISION_STEP_DEFAULT, store, index);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a field for numeric values with the specified
|
||||||
|
* <code>precisionStep</code>. The instance is not yet initialized with
|
||||||
* a numeric value, before indexing a document containing this field,
|
* a numeric value, before indexing a document containing this field,
|
||||||
* set a value using the various set<em>???</em>Value() methods.
|
* set a value using the various set<em>???</em>Value() methods.
|
||||||
* This constrcutor creates an indexed, but not stored field.
|
* This constrcutor creates an indexed, but not stored field.
|
||||||
|
@ -96,7 +127,8 @@ public final class NumericField extends AbstractField {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a field for numeric values. The instance is not yet initialized with
|
* Creates a field for numeric values with the specified
|
||||||
|
* <code>precisionStep</code>. The instance is not yet initialized with
|
||||||
* a numeric value, before indexing a document containing this field,
|
* a numeric value, before indexing a document containing this field,
|
||||||
* set a value using the various set<em>???</em>Value() methods.
|
* set a value using the various set<em>???</em>Value() methods.
|
||||||
* @param name the field name
|
* @param name the field name
|
||||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.search;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.NumericTokenStream; // for javadocs
|
import org.apache.lucene.analysis.NumericTokenStream; // for javadocs
|
||||||
import org.apache.lucene.document.NumericField; // for javadocs
|
import org.apache.lucene.document.NumericField; // for javadocs
|
||||||
|
import org.apache.lucene.util.NumericUtils; // for javadocs
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Implementation of a {@link Filter} that implements <em>trie-based</em> range filtering
|
* Implementation of a {@link Filter} that implements <em>trie-based</em> range filtering
|
||||||
|
@ -64,6 +65,21 @@ public final class NumericRangeFilter extends MultiTermQueryWrapperFilter {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Factory that creates a <code>NumericRangeFilter</code>, that queries a <code>long</code>
|
||||||
|
* range using the default <code>precisionStep</code> {@link NumericUtils#PRECISION_STEP_DEFAULT} (4).
|
||||||
|
* You can have half-open ranges (which are in fact </≤ or >/≥ queries)
|
||||||
|
* by setting the min or max value to <code>null</code>. By setting inclusive to false, it will
|
||||||
|
* match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
|
||||||
|
*/
|
||||||
|
public static NumericRangeFilter newLongRange(final String field,
|
||||||
|
Long min, Long max, final boolean minInclusive, final boolean maxInclusive
|
||||||
|
) {
|
||||||
|
return new NumericRangeFilter(
|
||||||
|
NumericRangeQuery.newLongRange(field, min, max, minInclusive, maxInclusive)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Factory that creates a <code>NumericRangeFilter</code>, that filters a <code>int</code>
|
* Factory that creates a <code>NumericRangeFilter</code>, that filters a <code>int</code>
|
||||||
* range using the given <a href="NumericRangeQuery.html#precisionStepDesc"><code>precisionStep</code></a>.
|
* range using the given <a href="NumericRangeQuery.html#precisionStepDesc"><code>precisionStep</code></a>.
|
||||||
|
@ -79,6 +95,21 @@ public final class NumericRangeFilter extends MultiTermQueryWrapperFilter {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Factory that creates a <code>NumericRangeFilter</code>, that queries a <code>int</code>
|
||||||
|
* range using the default <code>precisionStep</code> {@link NumericUtils#PRECISION_STEP_DEFAULT} (4).
|
||||||
|
* You can have half-open ranges (which are in fact </≤ or >/≥ queries)
|
||||||
|
* by setting the min or max value to <code>null</code>. By setting inclusive to false, it will
|
||||||
|
* match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
|
||||||
|
*/
|
||||||
|
public static NumericRangeFilter newIntRange(final String field,
|
||||||
|
Integer min, Integer max, final boolean minInclusive, final boolean maxInclusive
|
||||||
|
) {
|
||||||
|
return new NumericRangeFilter(
|
||||||
|
NumericRangeQuery.newIntRange(field, min, max, minInclusive, maxInclusive)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Factory that creates a <code>NumericRangeFilter</code>, that filters a <code>double</code>
|
* Factory that creates a <code>NumericRangeFilter</code>, that filters a <code>double</code>
|
||||||
* range using the given <a href="NumericRangeQuery.html#precisionStepDesc"><code>precisionStep</code></a>.
|
* range using the given <a href="NumericRangeQuery.html#precisionStepDesc"><code>precisionStep</code></a>.
|
||||||
|
@ -94,6 +125,21 @@ public final class NumericRangeFilter extends MultiTermQueryWrapperFilter {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Factory that creates a <code>NumericRangeFilter</code>, that queries a <code>double</code>
|
||||||
|
* range using the default <code>precisionStep</code> {@link NumericUtils#PRECISION_STEP_DEFAULT} (4).
|
||||||
|
* You can have half-open ranges (which are in fact </≤ or >/≥ queries)
|
||||||
|
* by setting the min or max value to <code>null</code>. By setting inclusive to false, it will
|
||||||
|
* match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
|
||||||
|
*/
|
||||||
|
public static NumericRangeFilter newDoubleRange(final String field,
|
||||||
|
Double min, Double max, final boolean minInclusive, final boolean maxInclusive
|
||||||
|
) {
|
||||||
|
return new NumericRangeFilter(
|
||||||
|
NumericRangeQuery.newDoubleRange(field, min, max, minInclusive, maxInclusive)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Factory that creates a <code>NumericRangeFilter</code>, that filters a <code>float</code>
|
* Factory that creates a <code>NumericRangeFilter</code>, that filters a <code>float</code>
|
||||||
* range using the given <a href="NumericRangeQuery.html#precisionStepDesc"><code>precisionStep</code></a>.
|
* range using the given <a href="NumericRangeQuery.html#precisionStepDesc"><code>precisionStep</code></a>.
|
||||||
|
@ -109,6 +155,21 @@ public final class NumericRangeFilter extends MultiTermQueryWrapperFilter {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Factory that creates a <code>NumericRangeFilter</code>, that queries a <code>float</code>
|
||||||
|
* range using the default <code>precisionStep</code> {@link NumericUtils#PRECISION_STEP_DEFAULT} (4).
|
||||||
|
* You can have half-open ranges (which are in fact </≤ or >/≥ queries)
|
||||||
|
* by setting the min or max value to <code>null</code>. By setting inclusive to false, it will
|
||||||
|
* match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
|
||||||
|
*/
|
||||||
|
public static NumericRangeFilter newFloatRange(final String field,
|
||||||
|
Float min, Float max, final boolean minInclusive, final boolean maxInclusive
|
||||||
|
) {
|
||||||
|
return new NumericRangeFilter(
|
||||||
|
NumericRangeQuery.newFloatRange(field, min, max, minInclusive, maxInclusive)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
/** Returns the field name for this filter */
|
/** Returns the field name for this filter */
|
||||||
public String getField() { return ((NumericRangeQuery)query).getField(); }
|
public String getField() { return ((NumericRangeQuery)query).getField(); }
|
||||||
|
|
||||||
|
|
|
@ -38,8 +38,9 @@ import org.apache.lucene.index.Term;
|
||||||
* An important setting is the <a href="#precisionStepDesc"><code>precisionStep</code></a>, which specifies,
|
* An important setting is the <a href="#precisionStepDesc"><code>precisionStep</code></a>, which specifies,
|
||||||
* how many different precisions per numeric value are indexed to speed up range queries.
|
* how many different precisions per numeric value are indexed to speed up range queries.
|
||||||
* Lower values create more terms but speed up search, higher values create less terms, but
|
* Lower values create more terms but speed up search, higher values create less terms, but
|
||||||
* slow down search. Suitable values are 2, 4, or 8. A good starting point to test is 4.
|
* slow down search. Suitable values are between <b>1</b> and <b>8</b>. A good starting point to test is <b>4</b>,
|
||||||
* For code examples see {@link NumericField}.
|
* which is the default value for all <code>Numeric*</code> classes. For a discussion about ideal
|
||||||
|
* values, see below. Indexing code examples can be found in {@link NumericField}.
|
||||||
*
|
*
|
||||||
* <h4>Searching</h4>
|
* <h4>Searching</h4>
|
||||||
* <p>This class has no constructor, you can create queries depending on the data type
|
* <p>This class has no constructor, you can create queries depending on the data type
|
||||||
|
@ -51,6 +52,8 @@ import org.apache.lucene.index.Term;
|
||||||
* new Float(0.3f), new Float(0.10f),
|
* new Float(0.3f), new Float(0.10f),
|
||||||
* true, true);
|
* true, true);
|
||||||
* </pre>
|
* </pre>
|
||||||
|
* The used <a href="#precisionStepDesc"><code>precisionStep</code></a> must be compatible
|
||||||
|
* to the one used during indexing (see below). The default is also <b>4</b>.
|
||||||
*
|
*
|
||||||
* <h3>How it works</h3>
|
* <h3>How it works</h3>
|
||||||
*
|
*
|
||||||
|
@ -101,18 +104,31 @@ import org.apache.lucene.index.Term;
|
||||||
* be found out by testing. <b>Important:</b> You can index with a lower precision step value and test search speed
|
* be found out by testing. <b>Important:</b> You can index with a lower precision step value and test search speed
|
||||||
* using a multiple of the original step value.</p>
|
* using a multiple of the original step value.</p>
|
||||||
*
|
*
|
||||||
|
* <p>Good values for <code>precisionStep</code> are depending on usage and data type:
|
||||||
|
* <ul>
|
||||||
|
* <li>The default for all data types is <b>4</b>, which is used, when no <code>precisionStep</code> is given.
|
||||||
|
* <li>Ideal value in most cases for <em>64 bit</em> data types <em>(long, double)</em> is <b>6</b> or <b>8</b>.
|
||||||
|
* <li>Ideal value in most cases for <em>32 bit</em> data types <em>(int, float)</em> is <b>4</b>.
|
||||||
|
* <li>Steps <b>≥64</b> for <em>long/double</em> and <b>≥32</b> for <em>int/float</em> produces one token
|
||||||
|
* per value in the index and querying is as slow as a conventional {@link TermRangeQuery}. But it can be used
|
||||||
|
* to produce fields, that are solely used for sorting (in this case simply use {@link Integer#MAX_VALUE} as
|
||||||
|
* <code>precisionStep</code>). Using {@link NumericField NumericFields} for sorting
|
||||||
|
* is ideal, because building the field cache is much faster than with text-only numbers.
|
||||||
|
* Sorting is also possible with range query optimized fields using one of the above <code>precisionSteps</code>.
|
||||||
|
* </ul>
|
||||||
|
*
|
||||||
* <p>This dramatically improves the performance of Apache Lucene with range queries, which
|
* <p>This dramatically improves the performance of Apache Lucene with range queries, which
|
||||||
* are no longer dependent on the index size and the number of distinct values because there is
|
* are no longer dependent on the index size and the number of distinct values because there is
|
||||||
* an upper limit unrelated to either of these properties.</p>
|
* an upper limit unrelated to either of these properties.</p>
|
||||||
*
|
*
|
||||||
* <p>Comparisions of the different types of RangeQueries on an index with about 500,000 docs showed
|
* <p>Comparisions of the different types of RangeQueries on an index with about 500,000 docs showed
|
||||||
* that the old {@link RangeQuery} (with raised {@link BooleanQuery} clause count) took about 30-40
|
* that {@link TermRangeQuery} in boolean rewrite mode (with raised {@link BooleanQuery} clause count)
|
||||||
* secs to complete, {@link ConstantScoreRangeQuery} took 5 secs and executing
|
* took about 30-40 secs to complete, {@link TermRangeQuery} in constant score rewrite mode took 5 secs
|
||||||
* this class took <100ms to complete (on an Opteron64 machine, Java 1.5, 8 bit precision step).
|
* and executing this class took <100ms to complete (on an Opteron64 machine, Java 1.5, 8 bit
|
||||||
* This query type was developed for a geographic portal, where the performance for
|
* precision step). This query type was developed for a geographic portal, where the performance for
|
||||||
* e.g. bounding boxes or exact date/time stamps is important.</p>
|
* e.g. bounding boxes or exact date/time stamps is important.</p>
|
||||||
*
|
*
|
||||||
* <p>The query is in {@linkplain #setConstantScoreRewrite constant score mode} per default.
|
* <p>The query defaults to {@linkplain #setConstantScoreRewrite constant score rewrite mode}.
|
||||||
* With precision steps of ≤4, this query can be run in conventional {@link BooleanQuery}
|
* With precision steps of ≤4, this query can be run in conventional {@link BooleanQuery}
|
||||||
* rewrite mode without changing the max clause count.
|
* rewrite mode without changing the max clause count.
|
||||||
*
|
*
|
||||||
|
@ -127,8 +143,8 @@ public final class NumericRangeQuery extends MultiTermQuery {
|
||||||
Number min, Number max, final boolean minInclusive, final boolean maxInclusive
|
Number min, Number max, final boolean minInclusive, final boolean maxInclusive
|
||||||
) {
|
) {
|
||||||
assert (valSize == 32 || valSize == 64);
|
assert (valSize == 32 || valSize == 64);
|
||||||
if (precisionStep < 1 || precisionStep > valSize)
|
if (precisionStep < 1)
|
||||||
throw new IllegalArgumentException("precisionStep may only be 1.."+valSize);
|
throw new IllegalArgumentException("precisionStep must be >=1");
|
||||||
this.field = field.intern();
|
this.field = field.intern();
|
||||||
this.precisionStep = precisionStep;
|
this.precisionStep = precisionStep;
|
||||||
this.valSize = valSize;
|
this.valSize = valSize;
|
||||||
|
@ -152,6 +168,19 @@ public final class NumericRangeQuery extends MultiTermQuery {
|
||||||
return new NumericRangeQuery(field, precisionStep, 64, min, max, minInclusive, maxInclusive);
|
return new NumericRangeQuery(field, precisionStep, 64, min, max, minInclusive, maxInclusive);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Factory that creates a <code>NumericRangeQuery</code>, that queries a <code>long</code>
|
||||||
|
* range using the default <code>precisionStep</code> {@link NumericUtils#PRECISION_STEP_DEFAULT} (4).
|
||||||
|
* You can have half-open ranges (which are in fact </≤ or >/≥ queries)
|
||||||
|
* by setting the min or max value to <code>null</code>. By setting inclusive to false, it will
|
||||||
|
* match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
|
||||||
|
*/
|
||||||
|
public static NumericRangeQuery newLongRange(final String field,
|
||||||
|
Long min, Long max, final boolean minInclusive, final boolean maxInclusive
|
||||||
|
) {
|
||||||
|
return new NumericRangeQuery(field, NumericUtils.PRECISION_STEP_DEFAULT, 64, min, max, minInclusive, maxInclusive);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Factory that creates a <code>NumericRangeQuery</code>, that queries a <code>int</code>
|
* Factory that creates a <code>NumericRangeQuery</code>, that queries a <code>int</code>
|
||||||
* range using the given <a href="#precisionStepDesc"><code>precisionStep</code></a>.
|
* range using the given <a href="#precisionStepDesc"><code>precisionStep</code></a>.
|
||||||
|
@ -165,6 +194,19 @@ public final class NumericRangeQuery extends MultiTermQuery {
|
||||||
return new NumericRangeQuery(field, precisionStep, 32, min, max, minInclusive, maxInclusive);
|
return new NumericRangeQuery(field, precisionStep, 32, min, max, minInclusive, maxInclusive);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Factory that creates a <code>NumericRangeQuery</code>, that queries a <code>int</code>
|
||||||
|
* range using the default <code>precisionStep</code> {@link NumericUtils#PRECISION_STEP_DEFAULT} (4).
|
||||||
|
* You can have half-open ranges (which are in fact </≤ or >/≥ queries)
|
||||||
|
* by setting the min or max value to <code>null</code>. By setting inclusive to false, it will
|
||||||
|
* match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
|
||||||
|
*/
|
||||||
|
public static NumericRangeQuery newIntRange(final String field,
|
||||||
|
Integer min, Integer max, final boolean minInclusive, final boolean maxInclusive
|
||||||
|
) {
|
||||||
|
return new NumericRangeQuery(field, NumericUtils.PRECISION_STEP_DEFAULT, 32, min, max, minInclusive, maxInclusive);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Factory that creates a <code>NumericRangeQuery</code>, that queries a <code>double</code>
|
* Factory that creates a <code>NumericRangeQuery</code>, that queries a <code>double</code>
|
||||||
* range using the given <a href="#precisionStepDesc"><code>precisionStep</code></a>.
|
* range using the given <a href="#precisionStepDesc"><code>precisionStep</code></a>.
|
||||||
|
@ -178,6 +220,19 @@ public final class NumericRangeQuery extends MultiTermQuery {
|
||||||
return new NumericRangeQuery(field, precisionStep, 64, min, max, minInclusive, maxInclusive);
|
return new NumericRangeQuery(field, precisionStep, 64, min, max, minInclusive, maxInclusive);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Factory that creates a <code>NumericRangeQuery</code>, that queries a <code>double</code>
|
||||||
|
* range using the default <code>precisionStep</code> {@link NumericUtils#PRECISION_STEP_DEFAULT} (4).
|
||||||
|
* You can have half-open ranges (which are in fact </≤ or >/≥ queries)
|
||||||
|
* by setting the min or max value to <code>null</code>. By setting inclusive to false, it will
|
||||||
|
* match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
|
||||||
|
*/
|
||||||
|
public static NumericRangeQuery newDoubleRange(final String field,
|
||||||
|
Double min, Double max, final boolean minInclusive, final boolean maxInclusive
|
||||||
|
) {
|
||||||
|
return new NumericRangeQuery(field, NumericUtils.PRECISION_STEP_DEFAULT, 64, min, max, minInclusive, maxInclusive);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Factory that creates a <code>NumericRangeQuery</code>, that queries a <code>float</code>
|
* Factory that creates a <code>NumericRangeQuery</code>, that queries a <code>float</code>
|
||||||
* range using the given <a href="#precisionStepDesc"><code>precisionStep</code></a>.
|
* range using the given <a href="#precisionStepDesc"><code>precisionStep</code></a>.
|
||||||
|
@ -191,6 +246,19 @@ public final class NumericRangeQuery extends MultiTermQuery {
|
||||||
return new NumericRangeQuery(field, precisionStep, 32, min, max, minInclusive, maxInclusive);
|
return new NumericRangeQuery(field, precisionStep, 32, min, max, minInclusive, maxInclusive);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Factory that creates a <code>NumericRangeQuery</code>, that queries a <code>float</code>
|
||||||
|
* range using the default <code>precisionStep</code> {@link NumericUtils#PRECISION_STEP_DEFAULT} (4).
|
||||||
|
* You can have half-open ranges (which are in fact </≤ or >/≥ queries)
|
||||||
|
* by setting the min or max value to <code>null</code>. By setting inclusive to false, it will
|
||||||
|
* match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
|
||||||
|
*/
|
||||||
|
public static NumericRangeQuery newFloatRange(final String field,
|
||||||
|
Float min, Float max, final boolean minInclusive, final boolean maxInclusive
|
||||||
|
) {
|
||||||
|
return new NumericRangeQuery(field, NumericUtils.PRECISION_STEP_DEFAULT, 32, min, max, minInclusive, maxInclusive);
|
||||||
|
}
|
||||||
|
|
||||||
//@Override
|
//@Override
|
||||||
protected FilteredTermEnum getEnum(final IndexReader reader) throws IOException {
|
protected FilteredTermEnum getEnum(final IndexReader reader) throws IOException {
|
||||||
return new NumericRangeTermEnum(reader);
|
return new NumericRangeTermEnum(reader);
|
||||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.util;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import org.apache.lucene.analysis.NumericTokenStream; // for javadocs
|
import org.apache.lucene.analysis.NumericTokenStream; // for javadocs
|
||||||
|
import org.apache.lucene.document.NumericField; // for javadocs
|
||||||
import org.apache.lucene.search.NumericRangeQuery; // for javadocs
|
import org.apache.lucene.search.NumericRangeQuery; // for javadocs
|
||||||
import org.apache.lucene.search.NumericRangeFilter; // for javadocs
|
import org.apache.lucene.search.NumericRangeFilter; // for javadocs
|
||||||
|
|
||||||
|
@ -62,9 +63,15 @@ import org.apache.lucene.search.NumericRangeFilter; // for javadocs
|
||||||
public final class NumericUtils {
|
public final class NumericUtils {
|
||||||
|
|
||||||
private NumericUtils() {} // no instance!
|
private NumericUtils() {} // no instance!
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Longs are stored at lower precision by shifting off lower bits. The shift count is
|
* The default precision step used by {@link NumericField}, {@link NumericTokenStream},
|
||||||
|
* {@link NumericRangeQuery}, and {@link NumericRangeFilter} as default
|
||||||
|
*/
|
||||||
|
public static final int PRECISION_STEP_DEFAULT = 4;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Expert: Longs are stored at lower precision by shifting off lower bits. The shift count is
|
||||||
* stored as <code>SHIFT_START_LONG+shift</code> in the first character
|
* stored as <code>SHIFT_START_LONG+shift</code> in the first character
|
||||||
*/
|
*/
|
||||||
public static final char SHIFT_START_LONG = (char)0x20;
|
public static final char SHIFT_START_LONG = (char)0x20;
|
||||||
|
@ -74,10 +81,10 @@ public final class NumericUtils {
|
||||||
* for encoding <code>long</code> values.
|
* for encoding <code>long</code> values.
|
||||||
* @see #longToPrefixCoded(long,int,char[])
|
* @see #longToPrefixCoded(long,int,char[])
|
||||||
*/
|
*/
|
||||||
public static final int LONG_BUF_SIZE = 63/7 + 2;
|
public static final int BUF_SIZE_LONG = 63/7 + 2;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Integers are stored at lower precision by shifting off lower bits. The shift count is
|
* Expert: Integers are stored at lower precision by shifting off lower bits. The shift count is
|
||||||
* stored as <code>SHIFT_START_INT+shift</code> in the first character
|
* stored as <code>SHIFT_START_INT+shift</code> in the first character
|
||||||
*/
|
*/
|
||||||
public static final char SHIFT_START_INT = (char)0x60;
|
public static final char SHIFT_START_INT = (char)0x60;
|
||||||
|
@ -87,14 +94,14 @@ public final class NumericUtils {
|
||||||
* for encoding <code>int</code> values.
|
* for encoding <code>int</code> values.
|
||||||
* @see #intToPrefixCoded(int,int,char[])
|
* @see #intToPrefixCoded(int,int,char[])
|
||||||
*/
|
*/
|
||||||
public static final int INT_BUF_SIZE = 31/7 + 2;
|
public static final int BUF_SIZE_INT = 31/7 + 2;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Expert: Returns prefix coded bits after reducing the precision by <code>shift</code> bits.
|
* Expert: Returns prefix coded bits after reducing the precision by <code>shift</code> bits.
|
||||||
* This is method is used by {@link NumericTokenStream}.
|
* This is method is used by {@link NumericTokenStream}.
|
||||||
* @param val the numeric value
|
* @param val the numeric value
|
||||||
* @param shift how many bits to strip from the right
|
* @param shift how many bits to strip from the right
|
||||||
* @param buffer that will contain the encoded chars, must be at least of {@link #LONG_BUF_SIZE}
|
* @param buffer that will contain the encoded chars, must be at least of {@link #BUF_SIZE_LONG}
|
||||||
* length
|
* length
|
||||||
* @return number of chars written to buffer
|
* @return number of chars written to buffer
|
||||||
*/
|
*/
|
||||||
|
@ -122,7 +129,7 @@ public final class NumericUtils {
|
||||||
* @param shift how many bits to strip from the right
|
* @param shift how many bits to strip from the right
|
||||||
*/
|
*/
|
||||||
public static String longToPrefixCoded(final long val, final int shift) {
|
public static String longToPrefixCoded(final long val, final int shift) {
|
||||||
final char[] buffer = new char[LONG_BUF_SIZE];
|
final char[] buffer = new char[BUF_SIZE_LONG];
|
||||||
final int len = longToPrefixCoded(val, shift, buffer);
|
final int len = longToPrefixCoded(val, shift, buffer);
|
||||||
return new String(buffer, 0, len);
|
return new String(buffer, 0, len);
|
||||||
}
|
}
|
||||||
|
@ -142,7 +149,7 @@ public final class NumericUtils {
|
||||||
* This is method is used by {@link NumericTokenStream}.
|
* This is method is used by {@link NumericTokenStream}.
|
||||||
* @param val the numeric value
|
* @param val the numeric value
|
||||||
* @param shift how many bits to strip from the right
|
* @param shift how many bits to strip from the right
|
||||||
* @param buffer that will contain the encoded chars, must be at least of {@link #INT_BUF_SIZE}
|
* @param buffer that will contain the encoded chars, must be at least of {@link #BUF_SIZE_INT}
|
||||||
* length
|
* length
|
||||||
* @return number of chars written to buffer
|
* @return number of chars written to buffer
|
||||||
*/
|
*/
|
||||||
|
@ -170,7 +177,7 @@ public final class NumericUtils {
|
||||||
* @param shift how many bits to strip from the right
|
* @param shift how many bits to strip from the right
|
||||||
*/
|
*/
|
||||||
public static String intToPrefixCoded(final int val, final int shift) {
|
public static String intToPrefixCoded(final int val, final int shift) {
|
||||||
final char[] buffer = new char[INT_BUF_SIZE];
|
final char[] buffer = new char[BUF_SIZE_INT];
|
||||||
final int len = intToPrefixCoded(val, shift, buffer);
|
final int len = intToPrefixCoded(val, shift, buffer);
|
||||||
return new String(buffer, 0, len);
|
return new String(buffer, 0, len);
|
||||||
}
|
}
|
||||||
|
@ -294,8 +301,6 @@ public final class NumericUtils {
|
||||||
public static void splitLongRange(final LongRangeBuilder builder,
|
public static void splitLongRange(final LongRangeBuilder builder,
|
||||||
final int precisionStep, final long minBound, final long maxBound
|
final int precisionStep, final long minBound, final long maxBound
|
||||||
) {
|
) {
|
||||||
if (precisionStep<1 || precisionStep>64)
|
|
||||||
throw new IllegalArgumentException("precisionStep may only be 1..64");
|
|
||||||
splitRange(builder, 64, precisionStep, minBound, maxBound);
|
splitRange(builder, 64, precisionStep, minBound, maxBound);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -310,8 +315,6 @@ public final class NumericUtils {
|
||||||
public static void splitIntRange(final IntRangeBuilder builder,
|
public static void splitIntRange(final IntRangeBuilder builder,
|
||||||
final int precisionStep, final int minBound, final int maxBound
|
final int precisionStep, final int minBound, final int maxBound
|
||||||
) {
|
) {
|
||||||
if (precisionStep<1 || precisionStep>32)
|
|
||||||
throw new IllegalArgumentException("precisionStep may only be 1..32");
|
|
||||||
splitRange(builder, 32, precisionStep, (long)minBound, (long)maxBound);
|
splitRange(builder, 32, precisionStep, (long)minBound, (long)maxBound);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -320,6 +323,8 @@ public final class NumericUtils {
|
||||||
final Object builder, final int valSize,
|
final Object builder, final int valSize,
|
||||||
final int precisionStep, long minBound, long maxBound
|
final int precisionStep, long minBound, long maxBound
|
||||||
) {
|
) {
|
||||||
|
if (precisionStep < 1)
|
||||||
|
throw new IllegalArgumentException("precisionStep must be >=1");
|
||||||
if (minBound > maxBound) return;
|
if (minBound > maxBound) return;
|
||||||
for (int shift=0; ; shift += precisionStep) {
|
for (int shift=0; ; shift += precisionStep) {
|
||||||
// calculate new bounds for inner precision
|
// calculate new bounds for inner precision
|
||||||
|
|
|
@ -20,61 +20,67 @@ package org.apache.lucene.analysis;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
import org.apache.lucene.util.NumericUtils;
|
import org.apache.lucene.util.NumericUtils;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||||
|
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||||
|
|
||||||
public class TestNumericTokenStream extends LuceneTestCase {
|
public class TestNumericTokenStream extends LuceneTestCase {
|
||||||
|
|
||||||
static final int precisionStep = 8;
|
|
||||||
static final long lvalue = 4573245871874382L;
|
static final long lvalue = 4573245871874382L;
|
||||||
static final int ivalue = 123456;
|
static final int ivalue = 123456;
|
||||||
|
|
||||||
public void testLongStreamNewAPI() throws Exception {
|
public void testLongStreamNewAPI() throws Exception {
|
||||||
final NumericTokenStream stream=new NumericTokenStream(precisionStep).setLongValue(lvalue);
|
final NumericTokenStream stream=new NumericTokenStream().setLongValue(lvalue);
|
||||||
stream.setUseNewAPI(true);
|
stream.setUseNewAPI(true);
|
||||||
// use getAttribute to test if attributes really exist, if not an IAE will be throwed
|
// use getAttribute to test if attributes really exist, if not an IAE will be throwed
|
||||||
final TermAttribute termAtt = (TermAttribute) stream.getAttribute(TermAttribute.class);
|
final TermAttribute termAtt = (TermAttribute) stream.getAttribute(TermAttribute.class);
|
||||||
for (int shift=0; shift<64; shift+=precisionStep) {
|
final TypeAttribute typeAtt = (TypeAttribute) stream.getAttribute(TypeAttribute.class);
|
||||||
|
for (int shift=0; shift<64; shift+=NumericUtils.PRECISION_STEP_DEFAULT) {
|
||||||
assertTrue("New token is available", stream.incrementToken());
|
assertTrue("New token is available", stream.incrementToken());
|
||||||
assertEquals("Term is correctly encoded", NumericUtils.longToPrefixCoded(lvalue, shift), termAtt.term());
|
assertEquals("Term is correctly encoded", NumericUtils.longToPrefixCoded(lvalue, shift), termAtt.term());
|
||||||
|
assertEquals("Type correct", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC : NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type());
|
||||||
}
|
}
|
||||||
assertFalse("No more tokens available", stream.incrementToken());
|
assertFalse("No more tokens available", stream.incrementToken());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testLongStreamOldAPI() throws Exception {
|
public void testLongStreamOldAPI() throws Exception {
|
||||||
final NumericTokenStream stream=new NumericTokenStream(precisionStep).setLongValue(lvalue);
|
final NumericTokenStream stream=new NumericTokenStream().setLongValue(lvalue);
|
||||||
stream.setUseNewAPI(false);
|
stream.setUseNewAPI(false);
|
||||||
Token tok=new Token();
|
Token tok=new Token();
|
||||||
for (int shift=0; shift<64; shift+=precisionStep) {
|
for (int shift=0; shift<64; shift+=NumericUtils.PRECISION_STEP_DEFAULT) {
|
||||||
assertNotNull("New token is available", tok=stream.next(tok));
|
assertNotNull("New token is available", tok=stream.next(tok));
|
||||||
assertEquals("Term is correctly encoded", NumericUtils.longToPrefixCoded(lvalue, shift), tok.term());
|
assertEquals("Term is correctly encoded", NumericUtils.longToPrefixCoded(lvalue, shift), tok.term());
|
||||||
|
assertEquals("Type correct", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC : NumericTokenStream.TOKEN_TYPE_LOWER_PREC, tok.type());
|
||||||
}
|
}
|
||||||
assertNull("No more tokens available", stream.next(tok));
|
assertNull("No more tokens available", stream.next(tok));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testIntStreamNewAPI() throws Exception {
|
public void testIntStreamNewAPI() throws Exception {
|
||||||
final NumericTokenStream stream=new NumericTokenStream(precisionStep).setIntValue(ivalue);
|
final NumericTokenStream stream=new NumericTokenStream().setIntValue(ivalue);
|
||||||
stream.setUseNewAPI(true);
|
stream.setUseNewAPI(true);
|
||||||
// use getAttribute to test if attributes really exist, if not an IAE will be throwed
|
// use getAttribute to test if attributes really exist, if not an IAE will be throwed
|
||||||
final TermAttribute termAtt = (TermAttribute) stream.getAttribute(TermAttribute.class);
|
final TermAttribute termAtt = (TermAttribute) stream.getAttribute(TermAttribute.class);
|
||||||
for (int shift=0; shift<32; shift+=precisionStep) {
|
final TypeAttribute typeAtt = (TypeAttribute) stream.getAttribute(TypeAttribute.class);
|
||||||
|
for (int shift=0; shift<32; shift+=NumericUtils.PRECISION_STEP_DEFAULT) {
|
||||||
assertTrue("New token is available", stream.incrementToken());
|
assertTrue("New token is available", stream.incrementToken());
|
||||||
assertEquals("Term is correctly encoded", NumericUtils.intToPrefixCoded(ivalue, shift), termAtt.term());
|
assertEquals("Term is correctly encoded", NumericUtils.intToPrefixCoded(ivalue, shift), termAtt.term());
|
||||||
|
assertEquals("Type correct", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC : NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type());
|
||||||
}
|
}
|
||||||
assertFalse("No more tokens available", stream.incrementToken());
|
assertFalse("No more tokens available", stream.incrementToken());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testIntStreamOldAPI() throws Exception {
|
public void testIntStreamOldAPI() throws Exception {
|
||||||
final NumericTokenStream stream=new NumericTokenStream(precisionStep).setIntValue(ivalue);
|
final NumericTokenStream stream=new NumericTokenStream().setIntValue(ivalue);
|
||||||
stream.setUseNewAPI(false);
|
stream.setUseNewAPI(false);
|
||||||
Token tok=new Token();
|
Token tok=new Token();
|
||||||
for (int shift=0; shift<32; shift+=precisionStep) {
|
for (int shift=0; shift<32; shift+=NumericUtils.PRECISION_STEP_DEFAULT) {
|
||||||
assertNotNull("New token is available", tok=stream.next(tok));
|
assertNotNull("New token is available", tok=stream.next(tok));
|
||||||
assertEquals("Term is correctly encoded", NumericUtils.intToPrefixCoded(ivalue, shift), tok.term());
|
assertEquals("Term is correctly encoded", NumericUtils.intToPrefixCoded(ivalue, shift), tok.term());
|
||||||
|
assertEquals("Type correct", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC : NumericTokenStream.TOKEN_TYPE_LOWER_PREC, tok.type());
|
||||||
}
|
}
|
||||||
assertNull("No more tokens available", stream.next(tok));
|
assertNull("No more tokens available", stream.next(tok));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testNotInitialized() throws Exception {
|
public void testNotInitialized() throws Exception {
|
||||||
final NumericTokenStream stream=new NumericTokenStream(precisionStep);
|
final NumericTokenStream stream=new NumericTokenStream();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
stream.reset();
|
stream.reset();
|
||||||
|
|
|
@ -53,13 +53,14 @@ public class TestNumericRangeQuery32 extends LuceneTestCase {
|
||||||
field8 = new NumericField("field8", 8, Field.Store.YES, true),
|
field8 = new NumericField("field8", 8, Field.Store.YES, true),
|
||||||
field4 = new NumericField("field4", 4, Field.Store.YES, true),
|
field4 = new NumericField("field4", 4, Field.Store.YES, true),
|
||||||
field2 = new NumericField("field2", 2, Field.Store.YES, true),
|
field2 = new NumericField("field2", 2, Field.Store.YES, true),
|
||||||
|
fieldNoTrie = new NumericField("field"+Integer.MAX_VALUE, Integer.MAX_VALUE, Field.Store.YES, true),
|
||||||
ascfield8 = new NumericField("ascfield8", 8, Field.Store.NO, true),
|
ascfield8 = new NumericField("ascfield8", 8, Field.Store.NO, true),
|
||||||
ascfield4 = new NumericField("ascfield4", 4, Field.Store.NO, true),
|
ascfield4 = new NumericField("ascfield4", 4, Field.Store.NO, true),
|
||||||
ascfield2 = new NumericField("ascfield2", 2, Field.Store.NO, true);
|
ascfield2 = new NumericField("ascfield2", 2, Field.Store.NO, true);
|
||||||
|
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
// add fields, that have a distance to test general functionality
|
// add fields, that have a distance to test general functionality
|
||||||
doc.add(field8); doc.add(field4); doc.add(field2);
|
doc.add(field8); doc.add(field4); doc.add(field2); doc.add(fieldNoTrie);
|
||||||
// add ascending fields with a distance of 1, beginning at -noDocs/2 to test the correct splitting of range and inclusive/exclusive
|
// add ascending fields with a distance of 1, beginning at -noDocs/2 to test the correct splitting of range and inclusive/exclusive
|
||||||
doc.add(ascfield8); doc.add(ascfield4); doc.add(ascfield2);
|
doc.add(ascfield8); doc.add(ascfield4); doc.add(ascfield2);
|
||||||
|
|
||||||
|
@ -69,6 +70,7 @@ public class TestNumericRangeQuery32 extends LuceneTestCase {
|
||||||
field8.setIntValue(val);
|
field8.setIntValue(val);
|
||||||
field4.setIntValue(val);
|
field4.setIntValue(val);
|
||||||
field2.setIntValue(val);
|
field2.setIntValue(val);
|
||||||
|
fieldNoTrie.setIntValue(val);
|
||||||
|
|
||||||
val=l-(noDocs/2);
|
val=l-(noDocs/2);
|
||||||
ascfield8.setIntValue(val);
|
ascfield8.setIntValue(val);
|
||||||
|
@ -261,9 +263,13 @@ public class TestNumericRangeQuery32 extends LuceneTestCase {
|
||||||
termCountT += tq.getTotalNumberOfTerms();
|
termCountT += tq.getTotalNumberOfTerms();
|
||||||
termCountC += cq.getTotalNumberOfTerms();
|
termCountC += cq.getTotalNumberOfTerms();
|
||||||
}
|
}
|
||||||
System.out.println("Average number of terms during random search on '" + field + "':");
|
if (precisionStep == Integer.MAX_VALUE) {
|
||||||
System.out.println(" Trie query: " + (((double)termCountT)/(50*4)));
|
assertEquals("Total number of terms should be equal for unlimited precStep", termCountT, termCountC);
|
||||||
System.out.println(" Classical query: " + (((double)termCountC)/(50*4)));
|
} else {
|
||||||
|
System.out.println("Average number of terms during random search on '" + field + "':");
|
||||||
|
System.out.println(" Trie query: " + (((double)termCountT)/(50*4)));
|
||||||
|
System.out.println(" Classical query: " + (((double)termCountC)/(50*4)));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testRandomTrieAndClassicRangeQuery_8bit() throws Exception {
|
public void testRandomTrieAndClassicRangeQuery_8bit() throws Exception {
|
||||||
|
@ -278,6 +284,10 @@ public class TestNumericRangeQuery32 extends LuceneTestCase {
|
||||||
testRandomTrieAndClassicRangeQuery(2);
|
testRandomTrieAndClassicRangeQuery(2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testRandomTrieAndClassicRangeQuery_NoTrie() throws Exception {
|
||||||
|
testRandomTrieAndClassicRangeQuery(Integer.MAX_VALUE);
|
||||||
|
}
|
||||||
|
|
||||||
private void testRangeSplit(int precisionStep) throws Exception {
|
private void testRangeSplit(int precisionStep) throws Exception {
|
||||||
final Random rnd=newRandom();
|
final Random rnd=newRandom();
|
||||||
String field="ascfield"+precisionStep;
|
String field="ascfield"+precisionStep;
|
||||||
|
|
|
@ -51,27 +51,33 @@ public class TestNumericRangeQuery64 extends LuceneTestCase {
|
||||||
|
|
||||||
NumericField
|
NumericField
|
||||||
field8 = new NumericField("field8", 8, Field.Store.YES, true),
|
field8 = new NumericField("field8", 8, Field.Store.YES, true),
|
||||||
|
field6 = new NumericField("field6", 6, Field.Store.YES, true),
|
||||||
field4 = new NumericField("field4", 4, Field.Store.YES, true),
|
field4 = new NumericField("field4", 4, Field.Store.YES, true),
|
||||||
field2 = new NumericField("field2", 2, Field.Store.YES, true),
|
field2 = new NumericField("field2", 2, Field.Store.YES, true),
|
||||||
|
fieldNoTrie = new NumericField("field"+Integer.MAX_VALUE, Integer.MAX_VALUE, Field.Store.YES, true),
|
||||||
ascfield8 = new NumericField("ascfield8", 8, Field.Store.NO, true),
|
ascfield8 = new NumericField("ascfield8", 8, Field.Store.NO, true),
|
||||||
|
ascfield6 = new NumericField("ascfield6", 6, Field.Store.NO, true),
|
||||||
ascfield4 = new NumericField("ascfield4", 4, Field.Store.NO, true),
|
ascfield4 = new NumericField("ascfield4", 4, Field.Store.NO, true),
|
||||||
ascfield2 = new NumericField("ascfield2", 2, Field.Store.NO, true);
|
ascfield2 = new NumericField("ascfield2", 2, Field.Store.NO, true);
|
||||||
|
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
// add fields, that have a distance to test general functionality
|
// add fields, that have a distance to test general functionality
|
||||||
doc.add(field8); doc.add(field4); doc.add(field2);
|
doc.add(field8); doc.add(field6); doc.add(field4); doc.add(field2); doc.add(fieldNoTrie);
|
||||||
// add ascending fields with a distance of 1, beginning at -noDocs/2 to test the correct splitting of range and inclusive/exclusive
|
// add ascending fields with a distance of 1, beginning at -noDocs/2 to test the correct splitting of range and inclusive/exclusive
|
||||||
doc.add(ascfield8); doc.add(ascfield4); doc.add(ascfield2);
|
doc.add(ascfield8); doc.add(ascfield6); doc.add(ascfield4); doc.add(ascfield2);
|
||||||
|
|
||||||
// Add a series of noDocs docs with increasing long values, by updating the fields
|
// Add a series of noDocs docs with increasing long values, by updating the fields
|
||||||
for (int l=0; l<noDocs; l++) {
|
for (int l=0; l<noDocs; l++) {
|
||||||
long val=distance*l+startOffset;
|
long val=distance*l+startOffset;
|
||||||
field8.setLongValue(val);
|
field8.setLongValue(val);
|
||||||
|
field6.setLongValue(val);
|
||||||
field4.setLongValue(val);
|
field4.setLongValue(val);
|
||||||
field2.setLongValue(val);
|
field2.setLongValue(val);
|
||||||
|
fieldNoTrie.setLongValue(val);
|
||||||
|
|
||||||
val=l-(noDocs/2);
|
val=l-(noDocs/2);
|
||||||
ascfield8.setLongValue(val);
|
ascfield8.setLongValue(val);
|
||||||
|
ascfield6.setLongValue(val);
|
||||||
ascfield4.setLongValue(val);
|
ascfield4.setLongValue(val);
|
||||||
ascfield2.setLongValue(val);
|
ascfield2.setLongValue(val);
|
||||||
writer.addDocument(doc);
|
writer.addDocument(doc);
|
||||||
|
@ -139,6 +145,10 @@ public class TestNumericRangeQuery64 extends LuceneTestCase {
|
||||||
testRange(8);
|
testRange(8);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testRange_6bit() throws Exception {
|
||||||
|
testRange(6);
|
||||||
|
}
|
||||||
|
|
||||||
public void testRange_4bit() throws Exception {
|
public void testRange_4bit() throws Exception {
|
||||||
testRange(4);
|
testRange(4);
|
||||||
}
|
}
|
||||||
|
@ -178,6 +188,10 @@ public class TestNumericRangeQuery64 extends LuceneTestCase {
|
||||||
testLeftOpenRange(8);
|
testLeftOpenRange(8);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testLeftOpenRange_6bit() throws Exception {
|
||||||
|
testLeftOpenRange(6);
|
||||||
|
}
|
||||||
|
|
||||||
public void testLeftOpenRange_4bit() throws Exception {
|
public void testLeftOpenRange_4bit() throws Exception {
|
||||||
testLeftOpenRange(4);
|
testLeftOpenRange(4);
|
||||||
}
|
}
|
||||||
|
@ -206,6 +220,10 @@ public class TestNumericRangeQuery64 extends LuceneTestCase {
|
||||||
testRightOpenRange(8);
|
testRightOpenRange(8);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testRightOpenRange_6bit() throws Exception {
|
||||||
|
testRightOpenRange(6);
|
||||||
|
}
|
||||||
|
|
||||||
public void testRightOpenRange_4bit() throws Exception {
|
public void testRightOpenRange_4bit() throws Exception {
|
||||||
testRightOpenRange(4);
|
testRightOpenRange(4);
|
||||||
}
|
}
|
||||||
|
@ -261,15 +279,23 @@ public class TestNumericRangeQuery64 extends LuceneTestCase {
|
||||||
termCountT += tq.getTotalNumberOfTerms();
|
termCountT += tq.getTotalNumberOfTerms();
|
||||||
termCountC += cq.getTotalNumberOfTerms();
|
termCountC += cq.getTotalNumberOfTerms();
|
||||||
}
|
}
|
||||||
System.out.println("Average number of terms during random search on '" + field + "':");
|
if (precisionStep == Integer.MAX_VALUE) {
|
||||||
System.out.println(" Trie query: " + (((double)termCountT)/(50*4)));
|
assertEquals("Total number of terms should be equal for unlimited precStep", termCountT, termCountC);
|
||||||
System.out.println(" Classical query: " + (((double)termCountC)/(50*4)));
|
} else {
|
||||||
|
System.out.println("Average number of terms during random search on '" + field + "':");
|
||||||
|
System.out.println(" Trie query: " + (((double)termCountT)/(50*4)));
|
||||||
|
System.out.println(" Classical query: " + (((double)termCountC)/(50*4)));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testRandomTrieAndClassicRangeQuery_8bit() throws Exception {
|
public void testRandomTrieAndClassicRangeQuery_8bit() throws Exception {
|
||||||
testRandomTrieAndClassicRangeQuery(8);
|
testRandomTrieAndClassicRangeQuery(8);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testRandomTrieAndClassicRangeQuery_6bit() throws Exception {
|
||||||
|
testRandomTrieAndClassicRangeQuery(6);
|
||||||
|
}
|
||||||
|
|
||||||
public void testRandomTrieAndClassicRangeQuery_4bit() throws Exception {
|
public void testRandomTrieAndClassicRangeQuery_4bit() throws Exception {
|
||||||
testRandomTrieAndClassicRangeQuery(4);
|
testRandomTrieAndClassicRangeQuery(4);
|
||||||
}
|
}
|
||||||
|
@ -278,6 +304,10 @@ public class TestNumericRangeQuery64 extends LuceneTestCase {
|
||||||
testRandomTrieAndClassicRangeQuery(2);
|
testRandomTrieAndClassicRangeQuery(2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testRandomTrieAndClassicRangeQuery_NoTrie() throws Exception {
|
||||||
|
testRandomTrieAndClassicRangeQuery(Integer.MAX_VALUE);
|
||||||
|
}
|
||||||
|
|
||||||
private void testRangeSplit(int precisionStep) throws Exception {
|
private void testRangeSplit(int precisionStep) throws Exception {
|
||||||
final Random rnd=newRandom();
|
final Random rnd=newRandom();
|
||||||
String field="ascfield"+precisionStep;
|
String field="ascfield"+precisionStep;
|
||||||
|
@ -311,6 +341,10 @@ public class TestNumericRangeQuery64 extends LuceneTestCase {
|
||||||
testRangeSplit(8);
|
testRangeSplit(8);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testRangeSplit_6bit() throws Exception {
|
||||||
|
testRangeSplit(6);
|
||||||
|
}
|
||||||
|
|
||||||
public void testRangeSplit_4bit() throws Exception {
|
public void testRangeSplit_4bit() throws Exception {
|
||||||
testRangeSplit(4);
|
testRangeSplit(4);
|
||||||
}
|
}
|
||||||
|
@ -339,6 +373,10 @@ public class TestNumericRangeQuery64 extends LuceneTestCase {
|
||||||
testDoubleRange(8);
|
testDoubleRange(8);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testDoubleRange_6bit() throws Exception {
|
||||||
|
testDoubleRange(6);
|
||||||
|
}
|
||||||
|
|
||||||
public void testDoubleRange_4bit() throws Exception {
|
public void testDoubleRange_4bit() throws Exception {
|
||||||
testDoubleRange(4);
|
testDoubleRange(4);
|
||||||
}
|
}
|
||||||
|
@ -376,6 +414,10 @@ public class TestNumericRangeQuery64 extends LuceneTestCase {
|
||||||
testSorting(8);
|
testSorting(8);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testSorting_6bit() throws Exception {
|
||||||
|
testSorting(6);
|
||||||
|
}
|
||||||
|
|
||||||
public void testSorting_4bit() throws Exception {
|
public void testSorting_4bit() throws Exception {
|
||||||
testSorting(4);
|
testSorting(4);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue