LUCENE-7073: fix FieldType issues with Points

This commit is contained in:
Robert Muir 2016-03-07 16:12:15 -05:00
parent 549e6d7c49
commit d7ee7c6615
8 changed files with 59 additions and 54 deletions

View File

@ -35,13 +35,12 @@ import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DoublePoint;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType.LegacyNumericType;
import org.apache.lucene.document.FloatPoint;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.LegacyIntField;
import org.apache.lucene.document.LegacyDoubleField;
import org.apache.lucene.document.LegacyLongField;
import org.apache.lucene.document.LegacyFloatField;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
@ -119,8 +118,8 @@ public class DocMaker implements Closeable {
fields.put(ID_FIELD, new StringField(ID_FIELD, "", Field.Store.YES));
fields.put(NAME_FIELD, new Field(NAME_FIELD, "", ft));
numericFields.put(DATE_MSEC_FIELD, new LegacyLongField(DATE_MSEC_FIELD, 0L, Field.Store.NO));
numericFields.put(TIME_SEC_FIELD, new LegacyIntField(TIME_SEC_FIELD, 0, Field.Store.NO));
numericFields.put(DATE_MSEC_FIELD, new LongPoint(DATE_MSEC_FIELD, 0L));
numericFields.put(TIME_SEC_FIELD, new IntPoint(TIME_SEC_FIELD, 0));
doc = new Document();
} else {
@ -148,7 +147,7 @@ public class DocMaker implements Closeable {
return f;
}
Field getNumericField(String name, LegacyNumericType type) {
Field getNumericField(String name, Class<? extends Number> numericType) {
Field f;
if (reuseFields) {
f = numericFields.get(name);
@ -157,21 +156,16 @@ public class DocMaker implements Closeable {
}
if (f == null) {
switch(type) {
case INT:
f = new LegacyIntField(name, 0, Field.Store.NO);
break;
case LONG:
f = new LegacyLongField(name, 0L, Field.Store.NO);
break;
case FLOAT:
f = new LegacyFloatField(name, 0.0F, Field.Store.NO);
break;
case DOUBLE:
f = new LegacyDoubleField(name, 0.0, Field.Store.NO);
break;
default:
throw new AssertionError("Cannot get here");
if (numericType.equals(Integer.class)) {
f = new IntPoint(name, 0);
} else if (numericType.equals(Long.class)) {
f = new LongPoint(name, 0L);
} else if (numericType.equals(Float.class)) {
f = new FloatPoint(name, 0.0F);
} else if (numericType.equals(Double.class)) {
f = new DoublePoint(name, 0.0);
} else {
throw new UnsupportedOperationException("Unsupported numeric type: " + numericType);
}
if (reuseFields) {
numericFields.put(name, f);
@ -278,14 +272,14 @@ public class DocMaker implements Closeable {
date = new Date();
}
Field dateField = ds.getNumericField(DATE_MSEC_FIELD, FieldType.LegacyNumericType.LONG);
Field dateField = ds.getNumericField(DATE_MSEC_FIELD, Long.class);
dateField.setLongValue(date.getTime());
doc.add(dateField);
util.cal.setTime(date);
final int sec = util.cal.get(Calendar.HOUR_OF_DAY)*3600 + util.cal.get(Calendar.MINUTE)*60 + util.cal.get(Calendar.SECOND);
Field timeSecField = ds.getNumericField(TIME_SEC_FIELD, LegacyNumericType.INT);
Field timeSecField = ds.getNumericField(TIME_SEC_FIELD, Integer.class);
timeSecField.setIntValue(sec);
doc.add(timeSecField);

View File

@ -26,11 +26,7 @@ import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.LegacyDoubleField;
import org.apache.lucene.document.LegacyFloatField;
import org.apache.lucene.document.LegacyIntField;
import org.apache.lucene.document.LegacyLongField;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexableField;
/**
@ -73,11 +69,8 @@ public class ReadTokensTask extends PerfTask {
Analyzer analyzer = getRunData().getAnalyzer();
int tokenCount = 0;
for(final IndexableField field : fields) {
if (!field.fieldType().tokenized() ||
field instanceof LegacyIntField ||
field instanceof LegacyLongField ||
field instanceof LegacyFloatField ||
field instanceof LegacyDoubleField) {
if (field.fieldType().indexOptions() == IndexOptions.NONE ||
field.fieldType().tokenized() == false) {
continue;
}

View File

@ -194,9 +194,9 @@
* </li>
* <li>
* {@link org.apache.lucene.codecs.lucene60.Lucene60PointsFormat Point values}.
* Optional pair of files, recording dimesionally indexed fields, to enable fast
* Optional pair of files, recording dimensionally indexed fields, to enable fast
* numeric range filtering and large numeric values like BigInteger and BigDecimal (1D)
* and geo shape intersection (2D, 3D).
* and geographic shape intersection (2D, 3D).
* </li>
* </ul>
* <p>Details on each of these are provided in their linked pages.</p>
@ -396,6 +396,9 @@
* contain the zlib-crc32 checksum of the file.</li>
* <li>In version 4.9, DocValues has a new multi-valued numeric type (SortedNumeric)
* that is suitable for faceting/sorting/analytics.
* <li>In version 5.4, DocValues have been improved to store more information on disk:
* addresses for binary fields and ord indexes for multi-valued fields.
* <li>In version 6.0, Points were added, for multi-dimensional range/distance search.
* </li>
* </ul>
* <a name="Limitations"></a>

View File

@ -21,6 +21,7 @@ import org.apache.lucene.analysis.Analyzer; // javadocs
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexableFieldType;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.util.LegacyNumericUtils;
/**
@ -75,8 +76,8 @@ public class FieldType implements IndexableFieldType {
this.numericType = ref.numericType();
this.numericPrecisionStep = ref.numericPrecisionStep();
this.docValuesType = ref.docValuesType();
this.dimensionCount = dimensionCount;
this.dimensionNumBytes = dimensionNumBytes;
this.dimensionCount = ref.dimensionCount;
this.dimensionNumBytes = ref.dimensionNumBytes;
// Do not copy frozen!
}
@ -365,18 +366,24 @@ public class FieldType implements IndexableFieldType {
*/
public void setDimensions(int dimensionCount, int dimensionNumBytes) {
if (dimensionCount < 0) {
throw new IllegalArgumentException("pointDimensionCount must be >= 0; got " + dimensionCount);
throw new IllegalArgumentException("dimensionCount must be >= 0; got " + dimensionCount);
}
if (dimensionCount > PointValues.MAX_DIMENSIONS) {
throw new IllegalArgumentException("dimensionCount must be <= " + PointValues.MAX_DIMENSIONS + "; got " + dimensionCount);
}
if (dimensionNumBytes < 0) {
throw new IllegalArgumentException("pointNumBytes must be >= 0; got " + dimensionNumBytes);
throw new IllegalArgumentException("dimensionNumBytes must be >= 0; got " + dimensionNumBytes);
}
if (dimensionCount > PointValues.MAX_NUM_BYTES) {
throw new IllegalArgumentException("dimensionNumBytes must be <= " + PointValues.MAX_NUM_BYTES + "; got " + dimensionNumBytes);
}
if (dimensionCount == 0) {
if (dimensionNumBytes != 0) {
throw new IllegalArgumentException("when pointDimensionCount is 0 pointNumBytes must 0; got " + dimensionNumBytes);
throw new IllegalArgumentException("when dimensionCount is 0, dimensionNumBytes must 0; got " + dimensionNumBytes);
}
} else if (dimensionNumBytes == 0) {
if (dimensionCount != 0) {
throw new IllegalArgumentException("when pointNumBytes is 0 pointDimensionCount must 0; got " + dimensionCount);
throw new IllegalArgumentException("when dimensionNumBytes is 0, dimensionCount must 0; got " + dimensionCount);
}
}
@ -484,6 +491,8 @@ public class FieldType implements IndexableFieldType {
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + dimensionCount;
result = prime * result + dimensionNumBytes;
result = prime * result + ((docValuesType == null) ? 0 : docValuesType.hashCode());
result = prime * result + indexOptions.hashCode();
result = prime * result + numericPrecisionStep;
@ -504,6 +513,8 @@ public class FieldType implements IndexableFieldType {
if (obj == null) return false;
if (getClass() != obj.getClass()) return false;
FieldType other = (FieldType) obj;
if (dimensionCount != other.dimensionCount) return false;
if (dimensionNumBytes != other.dimensionNumBytes) return false;
if (docValuesType != other.docValuesType) return false;
if (indexOptions != other.indexOptions) return false;
if (numericPrecisionStep != other.numericPrecisionStep) return false;
@ -517,5 +528,4 @@ public class FieldType implements IndexableFieldType {
if (tokenized != other.tokenized) return false;
return true;
}
}

View File

@ -31,10 +31,10 @@ import org.apache.lucene.index.DocValuesType;
*
* <p>
* Note that if you want to encode doubles or floats with proper sort order,
* you will need to encode them with {@link org.apache.lucene.util.LegacyNumericUtils}:
* you will need to encode them with {@link org.apache.lucene.util.NumericUtils}:
*
* <pre class="prettyprint">
* document.add(new SortedNumericDocValuesField(name, LegacyNumericUtils.floatToSortableInt(-5.3f)));
* document.add(new SortedNumericDocValuesField(name, NumericUtils.floatToSortableInt(-5.3f)));
* </pre>
*
* <p>

View File

@ -23,6 +23,7 @@ import java.lang.reflect.Modifier;
import org.apache.lucene.document.FieldType.LegacyNumericType;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.util.LuceneTestCase;
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
@ -70,6 +71,10 @@ public class TestFieldType extends LuceneTestCase {
FieldType ft10 = new FieldType();
ft10.setStoreTermVectors(true);
assertFalse(ft10.equals(ft));
FieldType ft11 = new FieldType();
ft11.setDimensions(1, 4);
assertFalse(ft11.equals(ft));
}
public void testPointsToString() {
@ -90,14 +95,16 @@ public class TestFieldType extends LuceneTestCase {
}
private static FieldType randomFieldType() throws Exception {
// setDimensions handled special as values must be in-bounds.
Method setDimensionsMethod = FieldType.class.getMethod("setDimensions", int.class, int.class);
FieldType ft = new FieldType();
for (Method method : FieldType.class.getMethods()) {
if ((method.getModifiers() & Modifier.PUBLIC) != 0 && method.getName().startsWith("set")) {
if (method.getName().startsWith("set")) {
final Class<?>[] parameterTypes = method.getParameterTypes();
final Object[] args = new Object[parameterTypes.length];
if (method.getName().equals("setPointDimensions")) {
args[0] = 1 + random().nextInt(15);
args[1] = 1 + random().nextInt(100);
if (method.equals(setDimensionsMethod)) {
args[0] = 1 + random().nextInt(PointValues.MAX_DIMENSIONS);
args[1] = 1 + random().nextInt(PointValues.MAX_NUM_BYTES);
} else {
for (int i = 0; i < args.length; ++i) {
args[i] = randomValue(parameterTypes[i]);

View File

@ -385,9 +385,8 @@ public class TestPointValues extends LuceneTestCase {
for(int i=0;i<values.length;i++) {
values[i] = new byte[4];
}
doc.add(new BinaryPoint("dim", values));
expectThrows(IllegalArgumentException.class, () -> {
w.addDocument(doc);
doc.add(new BinaryPoint("dim", values));
});
Document doc2 = new Document();

View File

@ -32,7 +32,6 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.LegacyNumericUtils;
import org.apache.lucene.util.NumericUtils;
/** Represents a range over double values.
@ -50,7 +49,7 @@ public final class DoubleRange extends Range {
super(label);
// TODO: if DoubleDocValuesField used
// LegacyNumericUtils.doubleToSortableLong format (instead of
// NumericUtils.doubleToSortableLong format (instead of
// Double.doubleToRawLongBits) we could do comparisons
// in long space