From d7ee7c661529b2a8c81c3cc52d581b4a5f19b5b8 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Mon, 7 Mar 2016 16:12:15 -0500 Subject: [PATCH] LUCENE-7073: fix FieldType issues with Points --- .../benchmark/byTask/feeds/DocMaker.java | 44 ++++++++----------- .../byTask/tasks/ReadTokensTask.java | 13 ++---- .../lucene/codecs/lucene60/package-info.java | 7 ++- .../org/apache/lucene/document/FieldType.java | 24 +++++++--- .../document/SortedNumericDocValuesField.java | 4 +- .../apache/lucene/document/TestFieldType.java | 15 +++++-- .../apache/lucene/index/TestPointValues.java | 3 +- .../lucene/facet/range/DoubleRange.java | 3 +- 8 files changed, 59 insertions(+), 54 deletions(-) diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java index f2c863cf798..4afafc321c7 100644 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java @@ -35,13 +35,12 @@ import java.util.concurrent.atomic.AtomicInteger; import org.apache.lucene.benchmark.byTask.utils.Config; import org.apache.lucene.document.Document; +import org.apache.lucene.document.DoublePoint; import org.apache.lucene.document.Field; -import org.apache.lucene.document.FieldType.LegacyNumericType; +import org.apache.lucene.document.FloatPoint; +import org.apache.lucene.document.IntPoint; import org.apache.lucene.document.FieldType; -import org.apache.lucene.document.LegacyIntField; -import org.apache.lucene.document.LegacyDoubleField; -import org.apache.lucene.document.LegacyLongField; -import org.apache.lucene.document.LegacyFloatField; +import org.apache.lucene.document.LongPoint; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; @@ -119,8 +118,8 @@ public class DocMaker implements Closeable { fields.put(ID_FIELD, new StringField(ID_FIELD, "", Field.Store.YES)); fields.put(NAME_FIELD, new Field(NAME_FIELD, "", ft)); - numericFields.put(DATE_MSEC_FIELD, new LegacyLongField(DATE_MSEC_FIELD, 0L, Field.Store.NO)); - numericFields.put(TIME_SEC_FIELD, new LegacyIntField(TIME_SEC_FIELD, 0, Field.Store.NO)); + numericFields.put(DATE_MSEC_FIELD, new LongPoint(DATE_MSEC_FIELD, 0L)); + numericFields.put(TIME_SEC_FIELD, new IntPoint(TIME_SEC_FIELD, 0)); doc = new Document(); } else { @@ -148,7 +147,7 @@ public class DocMaker implements Closeable { return f; } - Field getNumericField(String name, LegacyNumericType type) { + Field getNumericField(String name, Class numericType) { Field f; if (reuseFields) { f = numericFields.get(name); @@ -157,21 +156,16 @@ public class DocMaker implements Closeable { } if (f == null) { - switch(type) { - case INT: - f = new LegacyIntField(name, 0, Field.Store.NO); - break; - case LONG: - f = new LegacyLongField(name, 0L, Field.Store.NO); - break; - case FLOAT: - f = new LegacyFloatField(name, 0.0F, Field.Store.NO); - break; - case DOUBLE: - f = new LegacyDoubleField(name, 0.0, Field.Store.NO); - break; - default: - throw new AssertionError("Cannot get here"); + if (numericType.equals(Integer.class)) { + f = new IntPoint(name, 0); + } else if (numericType.equals(Long.class)) { + f = new LongPoint(name, 0L); + } else if (numericType.equals(Float.class)) { + f = new FloatPoint(name, 0.0F); + } else if (numericType.equals(Double.class)) { + f = new DoublePoint(name, 0.0); + } else { + throw new UnsupportedOperationException("Unsupported numeric type: " + numericType); } if (reuseFields) { numericFields.put(name, f); @@ -278,14 +272,14 @@ public class DocMaker implements Closeable { date = new Date(); } - Field dateField = ds.getNumericField(DATE_MSEC_FIELD, FieldType.LegacyNumericType.LONG); + Field dateField = ds.getNumericField(DATE_MSEC_FIELD, Long.class); dateField.setLongValue(date.getTime()); doc.add(dateField); util.cal.setTime(date); final int sec = util.cal.get(Calendar.HOUR_OF_DAY)*3600 + util.cal.get(Calendar.MINUTE)*60 + util.cal.get(Calendar.SECOND); - Field timeSecField = ds.getNumericField(TIME_SEC_FIELD, LegacyNumericType.INT); + Field timeSecField = ds.getNumericField(TIME_SEC_FIELD, Integer.class); timeSecField.setIntValue(sec); doc.add(timeSecField); diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTokensTask.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTokensTask.java index 4950d418e71..2e44b99fe3e 100644 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTokensTask.java +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTokensTask.java @@ -26,11 +26,7 @@ import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; import org.apache.lucene.benchmark.byTask.PerfRunData; import org.apache.lucene.benchmark.byTask.feeds.DocMaker; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.LegacyDoubleField; -import org.apache.lucene.document.LegacyFloatField; -import org.apache.lucene.document.LegacyIntField; -import org.apache.lucene.document.LegacyLongField; +import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexableField; /** @@ -73,11 +69,8 @@ public class ReadTokensTask extends PerfTask { Analyzer analyzer = getRunData().getAnalyzer(); int tokenCount = 0; for(final IndexableField field : fields) { - if (!field.fieldType().tokenized() || - field instanceof LegacyIntField || - field instanceof LegacyLongField || - field instanceof LegacyFloatField || - field instanceof LegacyDoubleField) { + if (field.fieldType().indexOptions() == IndexOptions.NONE || + field.fieldType().tokenized() == false) { continue; } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene60/package-info.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene60/package-info.java index 64531f5c34f..03a17ba2e38 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene60/package-info.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene60/package-info.java @@ -194,9 +194,9 @@ * *
  • * {@link org.apache.lucene.codecs.lucene60.Lucene60PointsFormat Point values}. - * Optional pair of files, recording dimesionally indexed fields, to enable fast + * Optional pair of files, recording dimensionally indexed fields, to enable fast * numeric range filtering and large numeric values like BigInteger and BigDecimal (1D) - * and geo shape intersection (2D, 3D). + * and geographic shape intersection (2D, 3D). *
  • * *

    Details on each of these are provided in their linked pages.

    @@ -396,6 +396,9 @@ * contain the zlib-crc32 checksum of the file. *
  • In version 4.9, DocValues has a new multi-valued numeric type (SortedNumeric) * that is suitable for faceting/sorting/analytics. + *
  • In version 5.4, DocValues have been improved to store more information on disk: + * addresses for binary fields and ord indexes for multi-valued fields. + *
  • In version 6.0, Points were added, for multi-dimensional range/distance search. *
  • * * diff --git a/lucene/core/src/java/org/apache/lucene/document/FieldType.java b/lucene/core/src/java/org/apache/lucene/document/FieldType.java index 1dfa8792a75..ae84016a327 100644 --- a/lucene/core/src/java/org/apache/lucene/document/FieldType.java +++ b/lucene/core/src/java/org/apache/lucene/document/FieldType.java @@ -21,6 +21,7 @@ import org.apache.lucene.analysis.Analyzer; // javadocs import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexableFieldType; +import org.apache.lucene.index.PointValues; import org.apache.lucene.util.LegacyNumericUtils; /** @@ -75,8 +76,8 @@ public class FieldType implements IndexableFieldType { this.numericType = ref.numericType(); this.numericPrecisionStep = ref.numericPrecisionStep(); this.docValuesType = ref.docValuesType(); - this.dimensionCount = dimensionCount; - this.dimensionNumBytes = dimensionNumBytes; + this.dimensionCount = ref.dimensionCount; + this.dimensionNumBytes = ref.dimensionNumBytes; // Do not copy frozen! } @@ -365,18 +366,24 @@ public class FieldType implements IndexableFieldType { */ public void setDimensions(int dimensionCount, int dimensionNumBytes) { if (dimensionCount < 0) { - throw new IllegalArgumentException("pointDimensionCount must be >= 0; got " + dimensionCount); + throw new IllegalArgumentException("dimensionCount must be >= 0; got " + dimensionCount); + } + if (dimensionCount > PointValues.MAX_DIMENSIONS) { + throw new IllegalArgumentException("dimensionCount must be <= " + PointValues.MAX_DIMENSIONS + "; got " + dimensionCount); } if (dimensionNumBytes < 0) { - throw new IllegalArgumentException("pointNumBytes must be >= 0; got " + dimensionNumBytes); + throw new IllegalArgumentException("dimensionNumBytes must be >= 0; got " + dimensionNumBytes); + } + if (dimensionCount > PointValues.MAX_NUM_BYTES) { + throw new IllegalArgumentException("dimensionNumBytes must be <= " + PointValues.MAX_NUM_BYTES + "; got " + dimensionNumBytes); } if (dimensionCount == 0) { if (dimensionNumBytes != 0) { - throw new IllegalArgumentException("when pointDimensionCount is 0 pointNumBytes must 0; got " + dimensionNumBytes); + throw new IllegalArgumentException("when dimensionCount is 0, dimensionNumBytes must 0; got " + dimensionNumBytes); } } else if (dimensionNumBytes == 0) { if (dimensionCount != 0) { - throw new IllegalArgumentException("when pointNumBytes is 0 pointDimensionCount must 0; got " + dimensionCount); + throw new IllegalArgumentException("when dimensionNumBytes is 0, dimensionCount must 0; got " + dimensionCount); } } @@ -484,6 +491,8 @@ public class FieldType implements IndexableFieldType { public int hashCode() { final int prime = 31; int result = 1; + result = prime * result + dimensionCount; + result = prime * result + dimensionNumBytes; result = prime * result + ((docValuesType == null) ? 0 : docValuesType.hashCode()); result = prime * result + indexOptions.hashCode(); result = prime * result + numericPrecisionStep; @@ -504,6 +513,8 @@ public class FieldType implements IndexableFieldType { if (obj == null) return false; if (getClass() != obj.getClass()) return false; FieldType other = (FieldType) obj; + if (dimensionCount != other.dimensionCount) return false; + if (dimensionNumBytes != other.dimensionNumBytes) return false; if (docValuesType != other.docValuesType) return false; if (indexOptions != other.indexOptions) return false; if (numericPrecisionStep != other.numericPrecisionStep) return false; @@ -517,5 +528,4 @@ public class FieldType implements IndexableFieldType { if (tokenized != other.tokenized) return false; return true; } - } diff --git a/lucene/core/src/java/org/apache/lucene/document/SortedNumericDocValuesField.java b/lucene/core/src/java/org/apache/lucene/document/SortedNumericDocValuesField.java index 40ceb2595c1..cbba218f7de 100644 --- a/lucene/core/src/java/org/apache/lucene/document/SortedNumericDocValuesField.java +++ b/lucene/core/src/java/org/apache/lucene/document/SortedNumericDocValuesField.java @@ -31,10 +31,10 @@ import org.apache.lucene.index.DocValuesType; * *

    * Note that if you want to encode doubles or floats with proper sort order, - * you will need to encode them with {@link org.apache.lucene.util.LegacyNumericUtils}: + * you will need to encode them with {@link org.apache.lucene.util.NumericUtils}: * *

    - *   document.add(new SortedNumericDocValuesField(name, LegacyNumericUtils.floatToSortableInt(-5.3f)));
    + *   document.add(new SortedNumericDocValuesField(name, NumericUtils.floatToSortableInt(-5.3f)));
      * 
    * *

    diff --git a/lucene/core/src/test/org/apache/lucene/document/TestFieldType.java b/lucene/core/src/test/org/apache/lucene/document/TestFieldType.java index 65f32d843e6..c49d4e013be 100644 --- a/lucene/core/src/test/org/apache/lucene/document/TestFieldType.java +++ b/lucene/core/src/test/org/apache/lucene/document/TestFieldType.java @@ -23,6 +23,7 @@ import java.lang.reflect.Modifier; import org.apache.lucene.document.FieldType.LegacyNumericType; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.PointValues; import org.apache.lucene.util.LuceneTestCase; import com.carrotsearch.randomizedtesting.generators.RandomPicks; @@ -70,6 +71,10 @@ public class TestFieldType extends LuceneTestCase { FieldType ft10 = new FieldType(); ft10.setStoreTermVectors(true); assertFalse(ft10.equals(ft)); + + FieldType ft11 = new FieldType(); + ft11.setDimensions(1, 4); + assertFalse(ft11.equals(ft)); } public void testPointsToString() { @@ -90,14 +95,16 @@ public class TestFieldType extends LuceneTestCase { } private static FieldType randomFieldType() throws Exception { + // setDimensions handled special as values must be in-bounds. + Method setDimensionsMethod = FieldType.class.getMethod("setDimensions", int.class, int.class); FieldType ft = new FieldType(); for (Method method : FieldType.class.getMethods()) { - if ((method.getModifiers() & Modifier.PUBLIC) != 0 && method.getName().startsWith("set")) { + if (method.getName().startsWith("set")) { final Class[] parameterTypes = method.getParameterTypes(); final Object[] args = new Object[parameterTypes.length]; - if (method.getName().equals("setPointDimensions")) { - args[0] = 1 + random().nextInt(15); - args[1] = 1 + random().nextInt(100); + if (method.equals(setDimensionsMethod)) { + args[0] = 1 + random().nextInt(PointValues.MAX_DIMENSIONS); + args[1] = 1 + random().nextInt(PointValues.MAX_NUM_BYTES); } else { for (int i = 0; i < args.length; ++i) { args[i] = randomValue(parameterTypes[i]); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestPointValues.java b/lucene/core/src/test/org/apache/lucene/index/TestPointValues.java index 9faa0bcfcce..7231b1afc6f 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestPointValues.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestPointValues.java @@ -385,9 +385,8 @@ public class TestPointValues extends LuceneTestCase { for(int i=0;i { - w.addDocument(doc); + doc.add(new BinaryPoint("dim", values)); }); Document doc2 = new Document(); diff --git a/lucene/facet/src/java/org/apache/lucene/facet/range/DoubleRange.java b/lucene/facet/src/java/org/apache/lucene/facet/range/DoubleRange.java index 7585708dde0..6f005ed4a1a 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/range/DoubleRange.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/range/DoubleRange.java @@ -32,7 +32,6 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.TwoPhaseIterator; import org.apache.lucene.search.Weight; -import org.apache.lucene.util.LegacyNumericUtils; import org.apache.lucene.util.NumericUtils; /** Represents a range over double values. @@ -50,7 +49,7 @@ public final class DoubleRange extends Range { super(label); // TODO: if DoubleDocValuesField used - // LegacyNumericUtils.doubleToSortableLong format (instead of + // NumericUtils.doubleToSortableLong format (instead of // Double.doubleToRawLongBits) we could do comparisons // in long space