From b2a4bc68adf2101970a83c8489fc5d936c6e9116 Mon Sep 17 00:00:00 2001
From: Uwe Schindler Values indexed by this stream can be loaded into the {@link FieldCache}
* and can be sorted (use {@link SortField}{@code .TYPE} to specify the correct
- * type; {@link SortField#AUTO} does not work with this type of field)
+ * type; {@link SortField#AUTO} does not work with this type of field).
+ * Values solely used for sorting can be indexed using a NOTE: This API is experimental and
* might change in incompatible ways in the next release.
@@ -86,24 +89,30 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
*/
public final class NumericTokenStream extends TokenStream {
- /** The full precision 64 bit token gets this token type assigned. */
- public static final String TOKEN_TYPE_FULL_PREC_64 = "fullPrecNumeric64";
+ /** The full precision token gets this token type assigned. */
+ public static final String TOKEN_TYPE_FULL_PREC = "fullPrecNumeric";
- /** The lower precision 64 bit tokens gets this token type assigned. */
- public static final String TOKEN_TYPE_LOWER_PREC_64 = "lowerPrecNumeric64";
-
- /** The full precision 32 bit token gets this token type assigned. */
- public static final String TOKEN_TYPE_FULL_PREC_32 = "fullPrecNumeric32";
-
- /** The lower precision 32 bit tokens gets this token type assigned. */
- public static final String TOKEN_TYPE_LOWER_PREC_32 = "lowerPrecNumeric32";
+ /** The lower precision tokens gets this token type assigned. */
+ public static final String TOKEN_TYPE_LOWER_PREC = "lowerPrecNumeric";
/**
- * Creates a token stream for numeric values. The stream is not yet initialized,
+ * Creates a token stream for numeric values using the default Values indexed by this field can be loaded into the {@link FieldCache}
* and can be sorted (use {@link SortField}{@code .TYPE} to specify the correct
- * type; {@link SortField#AUTO} does not work with this type of field)
+ * type; {@link SortField#AUTO} does not work with this type of field).
+ * Values solely used for sorting can be indexed using a NOTE: This API is experimental and
* might change in incompatible ways in the next release.
@@ -84,7 +88,34 @@ public final class NumericField extends AbstractField {
private final NumericTokenStream tokenStream;
/**
- * Creates a field for numeric values. The instance is not yet initialized with
+ * Creates a field for numeric values using the default This class has no constructor, you can create queries depending on the data type
@@ -51,6 +52,8 @@ import org.apache.lucene.index.Term;
* new Float(0.3f), new Float(0.10f),
* true, true);
*
+ * The used precisionStep
+ * of {@link Integer#MAX_VALUE} (at least ≥64), because this step only produces
+ * one value token with highest precision.
*
* precisionStep
+ * {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). The stream is not yet initialized,
+ * before using set a value using the various set???Value() methods.
+ */
+ public NumericTokenStream() {
+ this(NumericUtils.PRECISION_STEP_DEFAULT);
+ }
+
+ /**
+ * Creates a token stream for numeric values with the specified
+ * precisionStep
. The stream is not yet initialized,
* before using set a value using the various set???Value() methods.
*/
public NumericTokenStream(final int precisionStep) {
this.precisionStep = precisionStep;
+ if (precisionStep < 1)
+ throw new IllegalArgumentException("precisionStep must be >=1");
termAtt = (TermAttribute) addAttribute(TermAttribute.class);
typeAtt = (TypeAttribute) addAttribute(TypeAttribute.class);
posIncrAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
@@ -165,8 +174,6 @@ public final class NumericTokenStream extends TokenStream {
public void reset() {
if (valSize == 0)
throw new IllegalStateException("call set???Value() before usage");
- if (precisionStep < 1 || precisionStep > valSize)
- throw new IllegalArgumentException("precisionStep may only be 1.."+valSize);
shift = 0;
}
@@ -180,15 +187,13 @@ public final class NumericTokenStream extends TokenStream {
final char[] buffer;
switch (valSize) {
case 64:
- buffer = termAtt.resizeTermBuffer(NumericUtils.LONG_BUF_SIZE);
+ buffer = termAtt.resizeTermBuffer(NumericUtils.BUF_SIZE_LONG);
termAtt.setTermLength(NumericUtils.longToPrefixCoded(value, shift, buffer));
- typeAtt.setType((shift == 0) ? TOKEN_TYPE_FULL_PREC_64 : TOKEN_TYPE_LOWER_PREC_64);
break;
case 32:
- buffer = termAtt.resizeTermBuffer(NumericUtils.INT_BUF_SIZE);
+ buffer = termAtt.resizeTermBuffer(NumericUtils.BUF_SIZE_INT);
termAtt.setTermLength(NumericUtils.intToPrefixCoded((int) value, shift, buffer));
- typeAtt.setType((shift == 0) ? TOKEN_TYPE_FULL_PREC_32 : TOKEN_TYPE_LOWER_PREC_32);
break;
default:
@@ -196,6 +201,7 @@ public final class NumericTokenStream extends TokenStream {
throw new IllegalArgumentException("valSize must be 32 or 64");
}
+ typeAtt.setType((shift == 0) ? TOKEN_TYPE_FULL_PREC : TOKEN_TYPE_LOWER_PREC);
posIncrAtt.setPositionIncrement((shift == 0) ? 1 : 0);
shift += precisionStep;
return true;
@@ -215,15 +221,13 @@ public final class NumericTokenStream extends TokenStream {
final char[] buffer;
switch (valSize) {
case 64:
- buffer = reusableToken.resizeTermBuffer(NumericUtils.LONG_BUF_SIZE);
+ buffer = reusableToken.resizeTermBuffer(NumericUtils.BUF_SIZE_LONG);
reusableToken.setTermLength(NumericUtils.longToPrefixCoded(value, shift, buffer));
- reusableToken.setType((shift == 0) ? TOKEN_TYPE_FULL_PREC_64 : TOKEN_TYPE_LOWER_PREC_64);
break;
case 32:
- buffer = reusableToken.resizeTermBuffer(NumericUtils.INT_BUF_SIZE);
+ buffer = reusableToken.resizeTermBuffer(NumericUtils.BUF_SIZE_INT);
reusableToken.setTermLength(NumericUtils.intToPrefixCoded((int) value, shift, buffer));
- reusableToken.setType((shift == 0) ? TOKEN_TYPE_FULL_PREC_32 : TOKEN_TYPE_LOWER_PREC_32);
break;
default:
@@ -231,6 +235,7 @@ public final class NumericTokenStream extends TokenStream {
throw new IllegalArgumentException("valSize must be 32 or 64");
}
+ reusableToken.setType((shift == 0) ? TOKEN_TYPE_FULL_PREC : TOKEN_TYPE_LOWER_PREC);
reusableToken.setPositionIncrement((shift == 0) ? 1 : 0);
shift += precisionStep;
return reusableToken;
diff --git a/src/java/org/apache/lucene/document/NumericField.java b/src/java/org/apache/lucene/document/NumericField.java
index 1ae36c0c8a8..f22a1ba1eef 100644
--- a/src/java/org/apache/lucene/document/NumericField.java
+++ b/src/java/org/apache/lucene/document/NumericField.java
@@ -21,6 +21,7 @@ import java.io.Reader;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.NumericTokenStream;
+import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.search.NumericRangeQuery; // javadocs
import org.apache.lucene.search.NumericRangeFilter; // javadocs
import org.apache.lucene.search.SortField; // javadocs
@@ -72,7 +73,10 @@ import org.apache.lucene.search.FieldCache; // javadocs
*
* precisionStep
+ * of {@link Integer#MAX_VALUE} (at least ≥64), because this step only produces
+ * one value token with highest precision.
*
* precisionStep
+ * {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). The instance is not yet initialized with
+ * a numeric value, before indexing a document containing this field,
+ * set a value using the various set???Value() methods.
+ * This constrcutor creates an indexed, but not stored field.
+ * @param name the field name
+ */
+ public NumericField(String name) {
+ this(name, NumericUtils.PRECISION_STEP_DEFAULT, Field.Store.NO, true);
+ }
+
+ /**
+ * Creates a field for numeric values using the default precisionStep
+ * {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). The instance is not yet initialized with
+ * a numeric value, before indexing a document containing this field,
+ * set a value using the various set???Value() methods.
+ * @param name the field name
+ * @param store if the field should be stored in plain text form
+ * (according to toString(value)
of the used data type)
+ * @param index if the field should be indexed using {@link NumericTokenStream}
+ */
+ public NumericField(String name, Field.Store store, boolean index) {
+ this(name, NumericUtils.PRECISION_STEP_DEFAULT, store, index);
+ }
+
+ /**
+ * Creates a field for numeric values with the specified
+ * precisionStep
. The instance is not yet initialized with
* a numeric value, before indexing a document containing this field,
* set a value using the various set???Value() methods.
* This constrcutor creates an indexed, but not stored field.
@@ -96,7 +127,8 @@ public final class NumericField extends AbstractField {
}
/**
- * Creates a field for numeric values. The instance is not yet initialized with
+ * Creates a field for numeric values with the specified
+ * precisionStep
. The instance is not yet initialized with
* a numeric value, before indexing a document containing this field,
* set a value using the various set???Value() methods.
* @param name the field name
diff --git a/src/java/org/apache/lucene/search/NumericRangeFilter.java b/src/java/org/apache/lucene/search/NumericRangeFilter.java
index 09331a1f62c..f17e70a5ef3 100644
--- a/src/java/org/apache/lucene/search/NumericRangeFilter.java
+++ b/src/java/org/apache/lucene/search/NumericRangeFilter.java
@@ -19,6 +19,7 @@ package org.apache.lucene.search;
import org.apache.lucene.analysis.NumericTokenStream; // for javadocs
import org.apache.lucene.document.NumericField; // for javadocs
+import org.apache.lucene.util.NumericUtils; // for javadocs
/**
* Implementation of a {@link Filter} that implements trie-based range filtering
@@ -64,6 +65,21 @@ public final class NumericRangeFilter extends MultiTermQueryWrapperFilter {
);
}
+ /**
+ * Factory that creates a NumericRangeFilter
, that queries a long
+ * range using the default precisionStep
{@link NumericUtils#PRECISION_STEP_DEFAULT} (4).
+ * You can have half-open ranges (which are in fact </≤ or >/≥ queries)
+ * by setting the min or max value to null
. By setting inclusive to false, it will
+ * match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
+ */
+ public static NumericRangeFilter newLongRange(final String field,
+ Long min, Long max, final boolean minInclusive, final boolean maxInclusive
+ ) {
+ return new NumericRangeFilter(
+ NumericRangeQuery.newLongRange(field, min, max, minInclusive, maxInclusive)
+ );
+ }
+
/**
* Factory that creates a NumericRangeFilter
, that filters a int
* range using the given precisionStep
.
@@ -79,6 +95,21 @@ public final class NumericRangeFilter extends MultiTermQueryWrapperFilter {
);
}
+ /**
+ * Factory that creates a NumericRangeFilter
, that queries a int
+ * range using the default precisionStep
{@link NumericUtils#PRECISION_STEP_DEFAULT} (4).
+ * You can have half-open ranges (which are in fact </≤ or >/≥ queries)
+ * by setting the min or max value to null
. By setting inclusive to false, it will
+ * match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
+ */
+ public static NumericRangeFilter newIntRange(final String field,
+ Integer min, Integer max, final boolean minInclusive, final boolean maxInclusive
+ ) {
+ return new NumericRangeFilter(
+ NumericRangeQuery.newIntRange(field, min, max, minInclusive, maxInclusive)
+ );
+ }
+
/**
* Factory that creates a NumericRangeFilter
, that filters a double
* range using the given precisionStep
.
@@ -94,6 +125,21 @@ public final class NumericRangeFilter extends MultiTermQueryWrapperFilter {
);
}
+ /**
+ * Factory that creates a NumericRangeFilter
, that queries a double
+ * range using the default precisionStep
{@link NumericUtils#PRECISION_STEP_DEFAULT} (4).
+ * You can have half-open ranges (which are in fact </≤ or >/≥ queries)
+ * by setting the min or max value to null
. By setting inclusive to false, it will
+ * match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
+ */
+ public static NumericRangeFilter newDoubleRange(final String field,
+ Double min, Double max, final boolean minInclusive, final boolean maxInclusive
+ ) {
+ return new NumericRangeFilter(
+ NumericRangeQuery.newDoubleRange(field, min, max, minInclusive, maxInclusive)
+ );
+ }
+
/**
* Factory that creates a NumericRangeFilter
, that filters a float
* range using the given precisionStep
.
@@ -109,6 +155,21 @@ public final class NumericRangeFilter extends MultiTermQueryWrapperFilter {
);
}
+ /**
+ * Factory that creates a NumericRangeFilter
, that queries a float
+ * range using the default precisionStep
{@link NumericUtils#PRECISION_STEP_DEFAULT} (4).
+ * You can have half-open ranges (which are in fact </≤ or >/≥ queries)
+ * by setting the min or max value to null
. By setting inclusive to false, it will
+ * match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
+ */
+ public static NumericRangeFilter newFloatRange(final String field,
+ Float min, Float max, final boolean minInclusive, final boolean maxInclusive
+ ) {
+ return new NumericRangeFilter(
+ NumericRangeQuery.newFloatRange(field, min, max, minInclusive, maxInclusive)
+ );
+ }
+
/** Returns the field name for this filter */
public String getField() { return ((NumericRangeQuery)query).getField(); }
diff --git a/src/java/org/apache/lucene/search/NumericRangeQuery.java b/src/java/org/apache/lucene/search/NumericRangeQuery.java
index dfdc32f1572..6c680e29a4f 100644
--- a/src/java/org/apache/lucene/search/NumericRangeQuery.java
+++ b/src/java/org/apache/lucene/search/NumericRangeQuery.java
@@ -38,8 +38,9 @@ import org.apache.lucene.index.Term;
* An important setting is the precisionStep
, which specifies,
* how many different precisions per numeric value are indexed to speed up range queries.
* Lower values create more terms but speed up search, higher values create less terms, but
- * slow down search. Suitable values are 2, 4, or 8. A good starting point to test is 4.
- * For code examples see {@link NumericField}.
+ * slow down search. Suitable values are between 1 and 8. A good starting point to test is 4,
+ * which is the default value for all Numeric*
classes. For a discussion about ideal
+ * values, see below. Indexing code examples can be found in {@link NumericField}.
*
* Searching
* precisionStep
must be compatible
+ * to the one used during indexing (see below). The default is also 4.
*
* How it works
*
@@ -101,18 +104,31 @@ import org.apache.lucene.index.Term;
* be found out by testing. Important: You can index with a lower precision step value and test search speed
* using a multiple of the original step value.
Good values for precisionStep
are depending on usage and data type:
+ *
precisionStep
is given.
+ * precisionStep
). Using {@link NumericField NumericFields} for sorting
+ * is ideal, because building the field cache is much faster than with text-only numbers.
+ * Sorting is also possible with range query optimized fields using one of the above precisionSteps
.
+ * This dramatically improves the performance of Apache Lucene with range queries, which * are no longer dependent on the index size and the number of distinct values because there is * an upper limit unrelated to either of these properties.
* *Comparisions of the different types of RangeQueries on an index with about 500,000 docs showed - * that the old {@link RangeQuery} (with raised {@link BooleanQuery} clause count) took about 30-40 - * secs to complete, {@link ConstantScoreRangeQuery} took 5 secs and executing - * this class took <100ms to complete (on an Opteron64 machine, Java 1.5, 8 bit precision step). - * This query type was developed for a geographic portal, where the performance for + * that {@link TermRangeQuery} in boolean rewrite mode (with raised {@link BooleanQuery} clause count) + * took about 30-40 secs to complete, {@link TermRangeQuery} in constant score rewrite mode took 5 secs + * and executing this class took <100ms to complete (on an Opteron64 machine, Java 1.5, 8 bit + * precision step). This query type was developed for a geographic portal, where the performance for * e.g. bounding boxes or exact date/time stamps is important.
* - *The query is in {@linkplain #setConstantScoreRewrite constant score mode} per default. + *
The query defaults to {@linkplain #setConstantScoreRewrite constant score rewrite mode}.
* With precision steps of ≤4, this query can be run in conventional {@link BooleanQuery}
* rewrite mode without changing the max clause count.
*
@@ -127,8 +143,8 @@ public final class NumericRangeQuery extends MultiTermQuery {
Number min, Number max, final boolean minInclusive, final boolean maxInclusive
) {
assert (valSize == 32 || valSize == 64);
- if (precisionStep < 1 || precisionStep > valSize)
- throw new IllegalArgumentException("precisionStep may only be 1.."+valSize);
+ if (precisionStep < 1)
+ throw new IllegalArgumentException("precisionStep must be >=1");
this.field = field.intern();
this.precisionStep = precisionStep;
this.valSize = valSize;
@@ -152,6 +168,19 @@ public final class NumericRangeQuery extends MultiTermQuery {
return new NumericRangeQuery(field, precisionStep, 64, min, max, minInclusive, maxInclusive);
}
+ /**
+ * Factory that creates a NumericRangeQuery
, that queries a long
+ * range using the default precisionStep
{@link NumericUtils#PRECISION_STEP_DEFAULT} (4).
+ * You can have half-open ranges (which are in fact </≤ or >/≥ queries)
+ * by setting the min or max value to null
. By setting inclusive to false, it will
+ * match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
+ */
+ public static NumericRangeQuery newLongRange(final String field,
+ Long min, Long max, final boolean minInclusive, final boolean maxInclusive
+ ) {
+ return new NumericRangeQuery(field, NumericUtils.PRECISION_STEP_DEFAULT, 64, min, max, minInclusive, maxInclusive);
+ }
+
/**
* Factory that creates a NumericRangeQuery
, that queries a int
* range using the given precisionStep
.
@@ -165,6 +194,19 @@ public final class NumericRangeQuery extends MultiTermQuery {
return new NumericRangeQuery(field, precisionStep, 32, min, max, minInclusive, maxInclusive);
}
+ /**
+ * Factory that creates a NumericRangeQuery
, that queries a int
+ * range using the default precisionStep
{@link NumericUtils#PRECISION_STEP_DEFAULT} (4).
+ * You can have half-open ranges (which are in fact </≤ or >/≥ queries)
+ * by setting the min or max value to null
. By setting inclusive to false, it will
+ * match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
+ */
+ public static NumericRangeQuery newIntRange(final String field,
+ Integer min, Integer max, final boolean minInclusive, final boolean maxInclusive
+ ) {
+ return new NumericRangeQuery(field, NumericUtils.PRECISION_STEP_DEFAULT, 32, min, max, minInclusive, maxInclusive);
+ }
+
/**
* Factory that creates a NumericRangeQuery
, that queries a double
* range using the given precisionStep
.
@@ -178,6 +220,19 @@ public final class NumericRangeQuery extends MultiTermQuery {
return new NumericRangeQuery(field, precisionStep, 64, min, max, minInclusive, maxInclusive);
}
+ /**
+ * Factory that creates a NumericRangeQuery
, that queries a double
+ * range using the default precisionStep
{@link NumericUtils#PRECISION_STEP_DEFAULT} (4).
+ * You can have half-open ranges (which are in fact </≤ or >/≥ queries)
+ * by setting the min or max value to null
. By setting inclusive to false, it will
+ * match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
+ */
+ public static NumericRangeQuery newDoubleRange(final String field,
+ Double min, Double max, final boolean minInclusive, final boolean maxInclusive
+ ) {
+ return new NumericRangeQuery(field, NumericUtils.PRECISION_STEP_DEFAULT, 64, min, max, minInclusive, maxInclusive);
+ }
+
/**
* Factory that creates a NumericRangeQuery
, that queries a float
* range using the given precisionStep
.
@@ -191,6 +246,19 @@ public final class NumericRangeQuery extends MultiTermQuery {
return new NumericRangeQuery(field, precisionStep, 32, min, max, minInclusive, maxInclusive);
}
+ /**
+ * Factory that creates a NumericRangeQuery
, that queries a float
+ * range using the default precisionStep
{@link NumericUtils#PRECISION_STEP_DEFAULT} (4).
+ * You can have half-open ranges (which are in fact </≤ or >/≥ queries)
+ * by setting the min or max value to null
. By setting inclusive to false, it will
+ * match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
+ */
+ public static NumericRangeQuery newFloatRange(final String field,
+ Float min, Float max, final boolean minInclusive, final boolean maxInclusive
+ ) {
+ return new NumericRangeQuery(field, NumericUtils.PRECISION_STEP_DEFAULT, 32, min, max, minInclusive, maxInclusive);
+ }
+
//@Override
protected FilteredTermEnum getEnum(final IndexReader reader) throws IOException {
return new NumericRangeTermEnum(reader);
diff --git a/src/java/org/apache/lucene/util/NumericUtils.java b/src/java/org/apache/lucene/util/NumericUtils.java
index ea4252a8b96..1a3c635632a 100644
--- a/src/java/org/apache/lucene/util/NumericUtils.java
+++ b/src/java/org/apache/lucene/util/NumericUtils.java
@@ -18,6 +18,7 @@ package org.apache.lucene.util;
*/
import org.apache.lucene.analysis.NumericTokenStream; // for javadocs
+import org.apache.lucene.document.NumericField; // for javadocs
import org.apache.lucene.search.NumericRangeQuery; // for javadocs
import org.apache.lucene.search.NumericRangeFilter; // for javadocs
@@ -62,9 +63,15 @@ import org.apache.lucene.search.NumericRangeFilter; // for javadocs
public final class NumericUtils {
private NumericUtils() {} // no instance!
-
+
/**
- * Longs are stored at lower precision by shifting off lower bits. The shift count is
+ * The default precision step used by {@link NumericField}, {@link NumericTokenStream},
+ * {@link NumericRangeQuery}, and {@link NumericRangeFilter} as default
+ */
+ public static final int PRECISION_STEP_DEFAULT = 4;
+
+ /**
+ * Expert: Longs are stored at lower precision by shifting off lower bits. The shift count is
* stored as SHIFT_START_LONG+shift
in the first character
*/
public static final char SHIFT_START_LONG = (char)0x20;
@@ -74,10 +81,10 @@ public final class NumericUtils {
* for encoding long
values.
* @see #longToPrefixCoded(long,int,char[])
*/
- public static final int LONG_BUF_SIZE = 63/7 + 2;
+ public static final int BUF_SIZE_LONG = 63/7 + 2;
/**
- * Integers are stored at lower precision by shifting off lower bits. The shift count is
+ * Expert: Integers are stored at lower precision by shifting off lower bits. The shift count is
* stored as SHIFT_START_INT+shift
in the first character
*/
public static final char SHIFT_START_INT = (char)0x60;
@@ -87,14 +94,14 @@ public final class NumericUtils {
* for encoding int
values.
* @see #intToPrefixCoded(int,int,char[])
*/
- public static final int INT_BUF_SIZE = 31/7 + 2;
+ public static final int BUF_SIZE_INT = 31/7 + 2;
/**
* Expert: Returns prefix coded bits after reducing the precision by shift
bits.
* This is method is used by {@link NumericTokenStream}.
* @param val the numeric value
* @param shift how many bits to strip from the right
- * @param buffer that will contain the encoded chars, must be at least of {@link #LONG_BUF_SIZE}
+ * @param buffer that will contain the encoded chars, must be at least of {@link #BUF_SIZE_LONG}
* length
* @return number of chars written to buffer
*/
@@ -122,7 +129,7 @@ public final class NumericUtils {
* @param shift how many bits to strip from the right
*/
public static String longToPrefixCoded(final long val, final int shift) {
- final char[] buffer = new char[LONG_BUF_SIZE];
+ final char[] buffer = new char[BUF_SIZE_LONG];
final int len = longToPrefixCoded(val, shift, buffer);
return new String(buffer, 0, len);
}
@@ -142,7 +149,7 @@ public final class NumericUtils {
* This is method is used by {@link NumericTokenStream}.
* @param val the numeric value
* @param shift how many bits to strip from the right
- * @param buffer that will contain the encoded chars, must be at least of {@link #INT_BUF_SIZE}
+ * @param buffer that will contain the encoded chars, must be at least of {@link #BUF_SIZE_INT}
* length
* @return number of chars written to buffer
*/
@@ -170,7 +177,7 @@ public final class NumericUtils {
* @param shift how many bits to strip from the right
*/
public static String intToPrefixCoded(final int val, final int shift) {
- final char[] buffer = new char[INT_BUF_SIZE];
+ final char[] buffer = new char[BUF_SIZE_INT];
final int len = intToPrefixCoded(val, shift, buffer);
return new String(buffer, 0, len);
}
@@ -294,8 +301,6 @@ public final class NumericUtils {
public static void splitLongRange(final LongRangeBuilder builder,
final int precisionStep, final long minBound, final long maxBound
) {
- if (precisionStep<1 || precisionStep>64)
- throw new IllegalArgumentException("precisionStep may only be 1..64");
splitRange(builder, 64, precisionStep, minBound, maxBound);
}
@@ -310,8 +315,6 @@ public final class NumericUtils {
public static void splitIntRange(final IntRangeBuilder builder,
final int precisionStep, final int minBound, final int maxBound
) {
- if (precisionStep<1 || precisionStep>32)
- throw new IllegalArgumentException("precisionStep may only be 1..32");
splitRange(builder, 32, precisionStep, (long)minBound, (long)maxBound);
}
@@ -320,6 +323,8 @@ public final class NumericUtils {
final Object builder, final int valSize,
final int precisionStep, long minBound, long maxBound
) {
+ if (precisionStep < 1)
+ throw new IllegalArgumentException("precisionStep must be >=1");
if (minBound > maxBound) return;
for (int shift=0; ; shift += precisionStep) {
// calculate new bounds for inner precision
diff --git a/src/test/org/apache/lucene/analysis/TestNumericTokenStream.java b/src/test/org/apache/lucene/analysis/TestNumericTokenStream.java
index 5f225895a13..e9f73019512 100644
--- a/src/test/org/apache/lucene/analysis/TestNumericTokenStream.java
+++ b/src/test/org/apache/lucene/analysis/TestNumericTokenStream.java
@@ -20,61 +20,67 @@ package org.apache.lucene.analysis;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
public class TestNumericTokenStream extends LuceneTestCase {
- static final int precisionStep = 8;
static final long lvalue = 4573245871874382L;
static final int ivalue = 123456;
public void testLongStreamNewAPI() throws Exception {
- final NumericTokenStream stream=new NumericTokenStream(precisionStep).setLongValue(lvalue);
+ final NumericTokenStream stream=new NumericTokenStream().setLongValue(lvalue);
stream.setUseNewAPI(true);
// use getAttribute to test if attributes really exist, if not an IAE will be throwed
final TermAttribute termAtt = (TermAttribute) stream.getAttribute(TermAttribute.class);
- for (int shift=0; shift<64; shift+=precisionStep) {
+ final TypeAttribute typeAtt = (TypeAttribute) stream.getAttribute(TypeAttribute.class);
+ for (int shift=0; shift<64; shift+=NumericUtils.PRECISION_STEP_DEFAULT) {
assertTrue("New token is available", stream.incrementToken());
assertEquals("Term is correctly encoded", NumericUtils.longToPrefixCoded(lvalue, shift), termAtt.term());
+ assertEquals("Type correct", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC : NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type());
}
assertFalse("No more tokens available", stream.incrementToken());
}
public void testLongStreamOldAPI() throws Exception {
- final NumericTokenStream stream=new NumericTokenStream(precisionStep).setLongValue(lvalue);
+ final NumericTokenStream stream=new NumericTokenStream().setLongValue(lvalue);
stream.setUseNewAPI(false);
Token tok=new Token();
- for (int shift=0; shift<64; shift+=precisionStep) {
+ for (int shift=0; shift<64; shift+=NumericUtils.PRECISION_STEP_DEFAULT) {
assertNotNull("New token is available", tok=stream.next(tok));
assertEquals("Term is correctly encoded", NumericUtils.longToPrefixCoded(lvalue, shift), tok.term());
+ assertEquals("Type correct", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC : NumericTokenStream.TOKEN_TYPE_LOWER_PREC, tok.type());
}
assertNull("No more tokens available", stream.next(tok));
}
public void testIntStreamNewAPI() throws Exception {
- final NumericTokenStream stream=new NumericTokenStream(precisionStep).setIntValue(ivalue);
+ final NumericTokenStream stream=new NumericTokenStream().setIntValue(ivalue);
stream.setUseNewAPI(true);
// use getAttribute to test if attributes really exist, if not an IAE will be throwed
final TermAttribute termAtt = (TermAttribute) stream.getAttribute(TermAttribute.class);
- for (int shift=0; shift<32; shift+=precisionStep) {
+ final TypeAttribute typeAtt = (TypeAttribute) stream.getAttribute(TypeAttribute.class);
+ for (int shift=0; shift<32; shift+=NumericUtils.PRECISION_STEP_DEFAULT) {
assertTrue("New token is available", stream.incrementToken());
assertEquals("Term is correctly encoded", NumericUtils.intToPrefixCoded(ivalue, shift), termAtt.term());
+ assertEquals("Type correct", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC : NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type());
}
assertFalse("No more tokens available", stream.incrementToken());
}
public void testIntStreamOldAPI() throws Exception {
- final NumericTokenStream stream=new NumericTokenStream(precisionStep).setIntValue(ivalue);
+ final NumericTokenStream stream=new NumericTokenStream().setIntValue(ivalue);
stream.setUseNewAPI(false);
Token tok=new Token();
- for (int shift=0; shift<32; shift+=precisionStep) {
+ for (int shift=0; shift<32; shift+=NumericUtils.PRECISION_STEP_DEFAULT) {
assertNotNull("New token is available", tok=stream.next(tok));
assertEquals("Term is correctly encoded", NumericUtils.intToPrefixCoded(ivalue, shift), tok.term());
+ assertEquals("Type correct", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC : NumericTokenStream.TOKEN_TYPE_LOWER_PREC, tok.type());
}
assertNull("No more tokens available", stream.next(tok));
}
public void testNotInitialized() throws Exception {
- final NumericTokenStream stream=new NumericTokenStream(precisionStep);
+ final NumericTokenStream stream=new NumericTokenStream();
try {
stream.reset();
diff --git a/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java b/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java
index 5c928dc3b30..30316858195 100644
--- a/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java
+++ b/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java
@@ -53,13 +53,14 @@ public class TestNumericRangeQuery32 extends LuceneTestCase {
field8 = new NumericField("field8", 8, Field.Store.YES, true),
field4 = new NumericField("field4", 4, Field.Store.YES, true),
field2 = new NumericField("field2", 2, Field.Store.YES, true),
+ fieldNoTrie = new NumericField("field"+Integer.MAX_VALUE, Integer.MAX_VALUE, Field.Store.YES, true),
ascfield8 = new NumericField("ascfield8", 8, Field.Store.NO, true),
ascfield4 = new NumericField("ascfield4", 4, Field.Store.NO, true),
ascfield2 = new NumericField("ascfield2", 2, Field.Store.NO, true);
Document doc = new Document();
// add fields, that have a distance to test general functionality
- doc.add(field8); doc.add(field4); doc.add(field2);
+ doc.add(field8); doc.add(field4); doc.add(field2); doc.add(fieldNoTrie);
// add ascending fields with a distance of 1, beginning at -noDocs/2 to test the correct splitting of range and inclusive/exclusive
doc.add(ascfield8); doc.add(ascfield4); doc.add(ascfield2);
@@ -69,6 +70,7 @@ public class TestNumericRangeQuery32 extends LuceneTestCase {
field8.setIntValue(val);
field4.setIntValue(val);
field2.setIntValue(val);
+ fieldNoTrie.setIntValue(val);
val=l-(noDocs/2);
ascfield8.setIntValue(val);
@@ -261,9 +263,13 @@ public class TestNumericRangeQuery32 extends LuceneTestCase {
termCountT += tq.getTotalNumberOfTerms();
termCountC += cq.getTotalNumberOfTerms();
}
- System.out.println("Average number of terms during random search on '" + field + "':");
- System.out.println(" Trie query: " + (((double)termCountT)/(50*4)));
- System.out.println(" Classical query: " + (((double)termCountC)/(50*4)));
+ if (precisionStep == Integer.MAX_VALUE) {
+ assertEquals("Total number of terms should be equal for unlimited precStep", termCountT, termCountC);
+ } else {
+ System.out.println("Average number of terms during random search on '" + field + "':");
+ System.out.println(" Trie query: " + (((double)termCountT)/(50*4)));
+ System.out.println(" Classical query: " + (((double)termCountC)/(50*4)));
+ }
}
public void testRandomTrieAndClassicRangeQuery_8bit() throws Exception {
@@ -278,6 +284,10 @@ public class TestNumericRangeQuery32 extends LuceneTestCase {
testRandomTrieAndClassicRangeQuery(2);
}
+ public void testRandomTrieAndClassicRangeQuery_NoTrie() throws Exception {
+ testRandomTrieAndClassicRangeQuery(Integer.MAX_VALUE);
+ }
+
private void testRangeSplit(int precisionStep) throws Exception {
final Random rnd=newRandom();
String field="ascfield"+precisionStep;
diff --git a/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java b/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java
index bec0a35ac1b..9993827e5be 100644
--- a/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java
+++ b/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java
@@ -51,27 +51,33 @@ public class TestNumericRangeQuery64 extends LuceneTestCase {
NumericField
field8 = new NumericField("field8", 8, Field.Store.YES, true),
+ field6 = new NumericField("field6", 6, Field.Store.YES, true),
field4 = new NumericField("field4", 4, Field.Store.YES, true),
field2 = new NumericField("field2", 2, Field.Store.YES, true),
+ fieldNoTrie = new NumericField("field"+Integer.MAX_VALUE, Integer.MAX_VALUE, Field.Store.YES, true),
ascfield8 = new NumericField("ascfield8", 8, Field.Store.NO, true),
+ ascfield6 = new NumericField("ascfield6", 6, Field.Store.NO, true),
ascfield4 = new NumericField("ascfield4", 4, Field.Store.NO, true),
ascfield2 = new NumericField("ascfield2", 2, Field.Store.NO, true);
Document doc = new Document();
// add fields, that have a distance to test general functionality
- doc.add(field8); doc.add(field4); doc.add(field2);
+ doc.add(field8); doc.add(field6); doc.add(field4); doc.add(field2); doc.add(fieldNoTrie);
// add ascending fields with a distance of 1, beginning at -noDocs/2 to test the correct splitting of range and inclusive/exclusive
- doc.add(ascfield8); doc.add(ascfield4); doc.add(ascfield2);
+ doc.add(ascfield8); doc.add(ascfield6); doc.add(ascfield4); doc.add(ascfield2);
// Add a series of noDocs docs with increasing long values, by updating the fields
for (int l=0; l