diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 4ce8943cd7c..bd8b6282128 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -141,6 +141,10 @@ Optimizations that packs the core attributes into one impl, for faster clearAttributes(), saveState(), and restoreState(). (Uwe Schindler, Robert Muir) +* LUCENE-5609: Changed the default NumericField precisionStep from 4 + to 8 (for int/float) and 16 (for long/double), for faster indexing + time and smaller indices. (Robert Muir, Uwe Schindler, Mike McCandless) + Bug fixes * LUCENE-5600: HttpClientBase did not properly consume a connection if a server diff --git a/lucene/core/src/java/org/apache/lucene/analysis/NumericTokenStream.java b/lucene/core/src/java/org/apache/lucene/analysis/NumericTokenStream.java index acb31d12434..9b6b8a60c4d 100644 --- a/lucene/core/src/java/org/apache/lucene/analysis/NumericTokenStream.java +++ b/lucene/core/src/java/org/apache/lucene/analysis/NumericTokenStream.java @@ -214,7 +214,7 @@ public final class NumericTokenStream extends TokenStream { /** * Creates a token stream for numeric values using the default precisionStep - * {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). The stream is not yet initialized, + * {@link NumericUtils#PRECISION_STEP_DEFAULT} (16). The stream is not yet initialized, * before using set a value using the various set???Value() methods. */ public NumericTokenStream() { diff --git a/lucene/core/src/java/org/apache/lucene/document/DoubleField.java b/lucene/core/src/java/org/apache/lucene/document/DoubleField.java index 4446f460672..1a56c7c5329 100644 --- a/lucene/core/src/java/org/apache/lucene/document/DoubleField.java +++ b/lucene/core/src/java/org/apache/lucene/document/DoubleField.java @@ -145,7 +145,7 @@ public final class DoubleField extends Field { /** Creates a stored or un-stored DoubleField with the provided value * and default precisionStep {@link - * NumericUtils#PRECISION_STEP_DEFAULT} (4). + * NumericUtils#PRECISION_STEP_DEFAULT} (16). * @param name field name * @param value 64-bit double value * @param stored Store.YES if the content should also be stored diff --git a/lucene/core/src/java/org/apache/lucene/document/FloatField.java b/lucene/core/src/java/org/apache/lucene/document/FloatField.java index 062d0bd3a8f..5b326cfceae 100644 --- a/lucene/core/src/java/org/apache/lucene/document/FloatField.java +++ b/lucene/core/src/java/org/apache/lucene/document/FloatField.java @@ -125,6 +125,7 @@ public final class FloatField extends Field { TYPE_NOT_STORED.setOmitNorms(true); TYPE_NOT_STORED.setIndexOptions(IndexOptions.DOCS_ONLY); TYPE_NOT_STORED.setNumericType(FieldType.NumericType.FLOAT); + TYPE_NOT_STORED.setNumericPrecisionStep(NumericUtils.PRECISION_STEP_DEFAULT_32); TYPE_NOT_STORED.freeze(); } @@ -139,13 +140,14 @@ public final class FloatField extends Field { TYPE_STORED.setOmitNorms(true); TYPE_STORED.setIndexOptions(IndexOptions.DOCS_ONLY); TYPE_STORED.setNumericType(FieldType.NumericType.FLOAT); + TYPE_STORED.setNumericPrecisionStep(NumericUtils.PRECISION_STEP_DEFAULT_32); TYPE_STORED.setStored(true); TYPE_STORED.freeze(); } /** Creates a stored or un-stored FloatField with the provided value * and default precisionStep {@link - * NumericUtils#PRECISION_STEP_DEFAULT} (4). + * NumericUtils#PRECISION_STEP_DEFAULT_32} (8). * @param name field name * @param value 32-bit double value * @param stored Store.YES if the content should also be stored diff --git a/lucene/core/src/java/org/apache/lucene/document/IntField.java b/lucene/core/src/java/org/apache/lucene/document/IntField.java index f48fdd14095..9188f3c6286 100644 --- a/lucene/core/src/java/org/apache/lucene/document/IntField.java +++ b/lucene/core/src/java/org/apache/lucene/document/IntField.java @@ -125,6 +125,7 @@ public final class IntField extends Field { TYPE_NOT_STORED.setOmitNorms(true); TYPE_NOT_STORED.setIndexOptions(IndexOptions.DOCS_ONLY); TYPE_NOT_STORED.setNumericType(FieldType.NumericType.INT); + TYPE_NOT_STORED.setNumericPrecisionStep(NumericUtils.PRECISION_STEP_DEFAULT_32); TYPE_NOT_STORED.freeze(); } @@ -139,13 +140,14 @@ public final class IntField extends Field { TYPE_STORED.setOmitNorms(true); TYPE_STORED.setIndexOptions(IndexOptions.DOCS_ONLY); TYPE_STORED.setNumericType(FieldType.NumericType.INT); + TYPE_STORED.setNumericPrecisionStep(NumericUtils.PRECISION_STEP_DEFAULT_32); TYPE_STORED.setStored(true); TYPE_STORED.freeze(); } /** Creates a stored or un-stored IntField with the provided value * and default precisionStep {@link - * NumericUtils#PRECISION_STEP_DEFAULT} (4). + * NumericUtils#PRECISION_STEP_DEFAULT_32} (8). * @param name field name * @param value 32-bit integer value * @param stored Store.YES if the content should also be stored diff --git a/lucene/core/src/java/org/apache/lucene/document/LongField.java b/lucene/core/src/java/org/apache/lucene/document/LongField.java index 14d92e4ecd8..6d9dc3c0509 100644 --- a/lucene/core/src/java/org/apache/lucene/document/LongField.java +++ b/lucene/core/src/java/org/apache/lucene/document/LongField.java @@ -155,7 +155,7 @@ public final class LongField extends Field { /** Creates a stored or un-stored LongField with the provided value * and default precisionStep {@link - * NumericUtils#PRECISION_STEP_DEFAULT} (4). + * NumericUtils#PRECISION_STEP_DEFAULT} (16). * @param name field name * @param value 64-bit long value * @param stored Store.YES if the content should also be stored diff --git a/lucene/core/src/java/org/apache/lucene/search/NumericRangeFilter.java b/lucene/core/src/java/org/apache/lucene/search/NumericRangeFilter.java index 96fe8a4ede0..05453ad8154 100644 --- a/lucene/core/src/java/org/apache/lucene/search/NumericRangeFilter.java +++ b/lucene/core/src/java/org/apache/lucene/search/NumericRangeFilter.java @@ -68,7 +68,7 @@ public final class NumericRangeFilter extends MultiTermQueryWr /** * Factory that creates a NumericRangeFilter, that queries a long - * range using the default precisionStep {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). + * range using the default precisionStep {@link NumericUtils#PRECISION_STEP_DEFAULT} (16). * You can have half-open ranges (which are in fact </≤ or >/≥ queries) * by setting the min or max value to null. By setting inclusive to false, it will * match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. @@ -98,7 +98,7 @@ public final class NumericRangeFilter extends MultiTermQueryWr /** * Factory that creates a NumericRangeFilter, that queries a int - * range using the default precisionStep {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). + * range using the default precisionStep {@link NumericUtils#PRECISION_STEP_DEFAULT_32} (8). * You can have half-open ranges (which are in fact </≤ or >/≥ queries) * by setting the min or max value to null. By setting inclusive to false, it will * match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. @@ -130,7 +130,7 @@ public final class NumericRangeFilter extends MultiTermQueryWr /** * Factory that creates a NumericRangeFilter, that queries a double - * range using the default precisionStep {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). + * range using the default precisionStep {@link NumericUtils#PRECISION_STEP_DEFAULT} (16). * You can have half-open ranges (which are in fact </≤ or >/≥ queries) * by setting the min or max value to null. * {@link Double#NaN} will never match a half-open range, to hit {@code NaN} use a query @@ -164,7 +164,7 @@ public final class NumericRangeFilter extends MultiTermQueryWr /** * Factory that creates a NumericRangeFilter, that queries a float - * range using the default precisionStep {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). + * range using the default precisionStep {@link NumericUtils#PRECISION_STEP_DEFAULT_32} (8). * You can have half-open ranges (which are in fact </≤ or >/≥ queries) * by setting the min or max value to null. * {@link Float#NaN} will never match a half-open range, to hit {@code NaN} use a query diff --git a/lucene/core/src/java/org/apache/lucene/search/NumericRangeQuery.java b/lucene/core/src/java/org/apache/lucene/search/NumericRangeQuery.java index 102523e0b5a..36784b08121 100644 --- a/lucene/core/src/java/org/apache/lucene/search/NumericRangeQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/NumericRangeQuery.java @@ -196,7 +196,7 @@ public final class NumericRangeQuery extends MultiTermQuery { /** * Factory that creates a NumericRangeQuery, that queries a long - * range using the default precisionStep {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). + * range using the default precisionStep {@link NumericUtils#PRECISION_STEP_DEFAULT} (16). * You can have half-open ranges (which are in fact </≤ or >/≥ queries) * by setting the min or max value to null. By setting inclusive to false, it will * match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. @@ -222,7 +222,7 @@ public final class NumericRangeQuery extends MultiTermQuery { /** * Factory that creates a NumericRangeQuery, that queries a int - * range using the default precisionStep {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). + * range using the default precisionStep {@link NumericUtils#PRECISION_STEP_DEFAULT_32} (8). * You can have half-open ranges (which are in fact </≤ or >/≥ queries) * by setting the min or max value to null. By setting inclusive to false, it will * match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. @@ -230,7 +230,7 @@ public final class NumericRangeQuery extends MultiTermQuery { public static NumericRangeQuery newIntRange(final String field, Integer min, Integer max, final boolean minInclusive, final boolean maxInclusive ) { - return new NumericRangeQuery<>(field, NumericUtils.PRECISION_STEP_DEFAULT, NumericType.INT, min, max, minInclusive, maxInclusive); + return new NumericRangeQuery<>(field, NumericUtils.PRECISION_STEP_DEFAULT_32, NumericType.INT, min, max, minInclusive, maxInclusive); } /** @@ -250,7 +250,7 @@ public final class NumericRangeQuery extends MultiTermQuery { /** * Factory that creates a NumericRangeQuery, that queries a double - * range using the default precisionStep {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). + * range using the default precisionStep {@link NumericUtils#PRECISION_STEP_DEFAULT} (16). * You can have half-open ranges (which are in fact </≤ or >/≥ queries) * by setting the min or max value to null. * {@link Double#NaN} will never match a half-open range, to hit {@code NaN} use a query @@ -280,7 +280,7 @@ public final class NumericRangeQuery extends MultiTermQuery { /** * Factory that creates a NumericRangeQuery, that queries a float - * range using the default precisionStep {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). + * range using the default precisionStep {@link NumericUtils#PRECISION_STEP_DEFAULT_32} (8). * You can have half-open ranges (which are in fact </≤ or >/≥ queries) * by setting the min or max value to null. * {@link Float#NaN} will never match a half-open range, to hit {@code NaN} use a query @@ -290,7 +290,7 @@ public final class NumericRangeQuery extends MultiTermQuery { public static NumericRangeQuery newFloatRange(final String field, Float min, Float max, final boolean minInclusive, final boolean maxInclusive ) { - return new NumericRangeQuery<>(field, NumericUtils.PRECISION_STEP_DEFAULT, NumericType.FLOAT, min, max, minInclusive, maxInclusive); + return new NumericRangeQuery<>(field, NumericUtils.PRECISION_STEP_DEFAULT_32, NumericType.FLOAT, min, max, minInclusive, maxInclusive); } @Override @SuppressWarnings("unchecked") diff --git a/lucene/core/src/java/org/apache/lucene/util/NumericUtils.java b/lucene/core/src/java/org/apache/lucene/util/NumericUtils.java index 98bb667d441..e92627e6500 100644 --- a/lucene/core/src/java/org/apache/lucene/util/NumericUtils.java +++ b/lucene/core/src/java/org/apache/lucene/util/NumericUtils.java @@ -72,12 +72,17 @@ public final class NumericUtils { private NumericUtils() {} // no instance! /** - * The default precision step used by {@link IntField}, - * {@link FloatField}, {@link LongField}, {@link - * DoubleField}, {@link NumericTokenStream}, {@link + * The default precision step used by {@link LongField}, + * {@link DoubleField}, {@link NumericTokenStream}, {@link * NumericRangeQuery}, and {@link NumericRangeFilter}. */ - public static final int PRECISION_STEP_DEFAULT = 4; + public static final int PRECISION_STEP_DEFAULT = 16; + + /** + * The default precision step used by {@link IntField} and + * {@link FloatField}. + */ + public static final int PRECISION_STEP_DEFAULT_32 = 8; /** * Longs are stored at lower precision by shifting off lower bits. The shift count is diff --git a/lucene/core/src/test/org/apache/lucene/index/TestFieldReuse.java b/lucene/core/src/test/org/apache/lucene/index/TestFieldReuse.java index b0a7b0f47fb..94de4c62c03 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestFieldReuse.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestFieldReuse.java @@ -75,7 +75,7 @@ public class TestFieldReuse extends BaseTokenStreamTestCase { // passing null TokenStream ts = intField.tokenStream(null, null); assertTrue(ts instanceof NumericTokenStream); - assertEquals(NumericUtils.PRECISION_STEP_DEFAULT, ((NumericTokenStream)ts).getPrecisionStep()); + assertEquals(NumericUtils.PRECISION_STEP_DEFAULT_32, ((NumericTokenStream)ts).getPrecisionStep()); assertNumericContents(5, ts); // now reuse previous stream