mirror of https://github.com/apache/lucene.git
LUCENE-7073: fix FieldType issues with Points
This commit is contained in:
parent
549e6d7c49
commit
d7ee7c6615
|
@ -35,13 +35,12 @@ import java.util.concurrent.atomic.AtomicInteger;
|
|||
|
||||
import org.apache.lucene.benchmark.byTask.utils.Config;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.DoublePoint;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType.LegacyNumericType;
|
||||
import org.apache.lucene.document.FloatPoint;
|
||||
import org.apache.lucene.document.IntPoint;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.LegacyIntField;
|
||||
import org.apache.lucene.document.LegacyDoubleField;
|
||||
import org.apache.lucene.document.LegacyLongField;
|
||||
import org.apache.lucene.document.LegacyFloatField;
|
||||
import org.apache.lucene.document.LongPoint;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.document.TextField;
|
||||
|
||||
|
@ -119,8 +118,8 @@ public class DocMaker implements Closeable {
|
|||
fields.put(ID_FIELD, new StringField(ID_FIELD, "", Field.Store.YES));
|
||||
fields.put(NAME_FIELD, new Field(NAME_FIELD, "", ft));
|
||||
|
||||
numericFields.put(DATE_MSEC_FIELD, new LegacyLongField(DATE_MSEC_FIELD, 0L, Field.Store.NO));
|
||||
numericFields.put(TIME_SEC_FIELD, new LegacyIntField(TIME_SEC_FIELD, 0, Field.Store.NO));
|
||||
numericFields.put(DATE_MSEC_FIELD, new LongPoint(DATE_MSEC_FIELD, 0L));
|
||||
numericFields.put(TIME_SEC_FIELD, new IntPoint(TIME_SEC_FIELD, 0));
|
||||
|
||||
doc = new Document();
|
||||
} else {
|
||||
|
@ -148,7 +147,7 @@ public class DocMaker implements Closeable {
|
|||
return f;
|
||||
}
|
||||
|
||||
Field getNumericField(String name, LegacyNumericType type) {
|
||||
Field getNumericField(String name, Class<? extends Number> numericType) {
|
||||
Field f;
|
||||
if (reuseFields) {
|
||||
f = numericFields.get(name);
|
||||
|
@ -157,21 +156,16 @@ public class DocMaker implements Closeable {
|
|||
}
|
||||
|
||||
if (f == null) {
|
||||
switch(type) {
|
||||
case INT:
|
||||
f = new LegacyIntField(name, 0, Field.Store.NO);
|
||||
break;
|
||||
case LONG:
|
||||
f = new LegacyLongField(name, 0L, Field.Store.NO);
|
||||
break;
|
||||
case FLOAT:
|
||||
f = new LegacyFloatField(name, 0.0F, Field.Store.NO);
|
||||
break;
|
||||
case DOUBLE:
|
||||
f = new LegacyDoubleField(name, 0.0, Field.Store.NO);
|
||||
break;
|
||||
default:
|
||||
throw new AssertionError("Cannot get here");
|
||||
if (numericType.equals(Integer.class)) {
|
||||
f = new IntPoint(name, 0);
|
||||
} else if (numericType.equals(Long.class)) {
|
||||
f = new LongPoint(name, 0L);
|
||||
} else if (numericType.equals(Float.class)) {
|
||||
f = new FloatPoint(name, 0.0F);
|
||||
} else if (numericType.equals(Double.class)) {
|
||||
f = new DoublePoint(name, 0.0);
|
||||
} else {
|
||||
throw new UnsupportedOperationException("Unsupported numeric type: " + numericType);
|
||||
}
|
||||
if (reuseFields) {
|
||||
numericFields.put(name, f);
|
||||
|
@ -278,14 +272,14 @@ public class DocMaker implements Closeable {
|
|||
date = new Date();
|
||||
}
|
||||
|
||||
Field dateField = ds.getNumericField(DATE_MSEC_FIELD, FieldType.LegacyNumericType.LONG);
|
||||
Field dateField = ds.getNumericField(DATE_MSEC_FIELD, Long.class);
|
||||
dateField.setLongValue(date.getTime());
|
||||
doc.add(dateField);
|
||||
|
||||
util.cal.setTime(date);
|
||||
final int sec = util.cal.get(Calendar.HOUR_OF_DAY)*3600 + util.cal.get(Calendar.MINUTE)*60 + util.cal.get(Calendar.SECOND);
|
||||
|
||||
Field timeSecField = ds.getNumericField(TIME_SEC_FIELD, LegacyNumericType.INT);
|
||||
Field timeSecField = ds.getNumericField(TIME_SEC_FIELD, Integer.class);
|
||||
timeSecField.setIntValue(sec);
|
||||
doc.add(timeSecField);
|
||||
|
||||
|
|
|
@ -26,11 +26,7 @@ import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
|
|||
import org.apache.lucene.benchmark.byTask.PerfRunData;
|
||||
import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.LegacyDoubleField;
|
||||
import org.apache.lucene.document.LegacyFloatField;
|
||||
import org.apache.lucene.document.LegacyIntField;
|
||||
import org.apache.lucene.document.LegacyLongField;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
|
||||
/**
|
||||
|
@ -73,11 +69,8 @@ public class ReadTokensTask extends PerfTask {
|
|||
Analyzer analyzer = getRunData().getAnalyzer();
|
||||
int tokenCount = 0;
|
||||
for(final IndexableField field : fields) {
|
||||
if (!field.fieldType().tokenized() ||
|
||||
field instanceof LegacyIntField ||
|
||||
field instanceof LegacyLongField ||
|
||||
field instanceof LegacyFloatField ||
|
||||
field instanceof LegacyDoubleField) {
|
||||
if (field.fieldType().indexOptions() == IndexOptions.NONE ||
|
||||
field.fieldType().tokenized() == false) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
|
@ -194,9 +194,9 @@
|
|||
* </li>
|
||||
* <li>
|
||||
* {@link org.apache.lucene.codecs.lucene60.Lucene60PointsFormat Point values}.
|
||||
* Optional pair of files, recording dimesionally indexed fields, to enable fast
|
||||
* Optional pair of files, recording dimensionally indexed fields, to enable fast
|
||||
* numeric range filtering and large numeric values like BigInteger and BigDecimal (1D)
|
||||
* and geo shape intersection (2D, 3D).
|
||||
* and geographic shape intersection (2D, 3D).
|
||||
* </li>
|
||||
* </ul>
|
||||
* <p>Details on each of these are provided in their linked pages.</p>
|
||||
|
@ -396,6 +396,9 @@
|
|||
* contain the zlib-crc32 checksum of the file.</li>
|
||||
* <li>In version 4.9, DocValues has a new multi-valued numeric type (SortedNumeric)
|
||||
* that is suitable for faceting/sorting/analytics.
|
||||
* <li>In version 5.4, DocValues have been improved to store more information on disk:
|
||||
* addresses for binary fields and ord indexes for multi-valued fields.
|
||||
* <li>In version 6.0, Points were added, for multi-dimensional range/distance search.
|
||||
* </li>
|
||||
* </ul>
|
||||
* <a name="Limitations"></a>
|
||||
|
|
|
@ -21,6 +21,7 @@ import org.apache.lucene.analysis.Analyzer; // javadocs
|
|||
import org.apache.lucene.index.DocValuesType;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.IndexableFieldType;
|
||||
import org.apache.lucene.index.PointValues;
|
||||
import org.apache.lucene.util.LegacyNumericUtils;
|
||||
|
||||
/**
|
||||
|
@ -75,8 +76,8 @@ public class FieldType implements IndexableFieldType {
|
|||
this.numericType = ref.numericType();
|
||||
this.numericPrecisionStep = ref.numericPrecisionStep();
|
||||
this.docValuesType = ref.docValuesType();
|
||||
this.dimensionCount = dimensionCount;
|
||||
this.dimensionNumBytes = dimensionNumBytes;
|
||||
this.dimensionCount = ref.dimensionCount;
|
||||
this.dimensionNumBytes = ref.dimensionNumBytes;
|
||||
// Do not copy frozen!
|
||||
}
|
||||
|
||||
|
@ -365,18 +366,24 @@ public class FieldType implements IndexableFieldType {
|
|||
*/
|
||||
public void setDimensions(int dimensionCount, int dimensionNumBytes) {
|
||||
if (dimensionCount < 0) {
|
||||
throw new IllegalArgumentException("pointDimensionCount must be >= 0; got " + dimensionCount);
|
||||
throw new IllegalArgumentException("dimensionCount must be >= 0; got " + dimensionCount);
|
||||
}
|
||||
if (dimensionCount > PointValues.MAX_DIMENSIONS) {
|
||||
throw new IllegalArgumentException("dimensionCount must be <= " + PointValues.MAX_DIMENSIONS + "; got " + dimensionCount);
|
||||
}
|
||||
if (dimensionNumBytes < 0) {
|
||||
throw new IllegalArgumentException("pointNumBytes must be >= 0; got " + dimensionNumBytes);
|
||||
throw new IllegalArgumentException("dimensionNumBytes must be >= 0; got " + dimensionNumBytes);
|
||||
}
|
||||
if (dimensionCount > PointValues.MAX_NUM_BYTES) {
|
||||
throw new IllegalArgumentException("dimensionNumBytes must be <= " + PointValues.MAX_NUM_BYTES + "; got " + dimensionNumBytes);
|
||||
}
|
||||
if (dimensionCount == 0) {
|
||||
if (dimensionNumBytes != 0) {
|
||||
throw new IllegalArgumentException("when pointDimensionCount is 0 pointNumBytes must 0; got " + dimensionNumBytes);
|
||||
throw new IllegalArgumentException("when dimensionCount is 0, dimensionNumBytes must 0; got " + dimensionNumBytes);
|
||||
}
|
||||
} else if (dimensionNumBytes == 0) {
|
||||
if (dimensionCount != 0) {
|
||||
throw new IllegalArgumentException("when pointNumBytes is 0 pointDimensionCount must 0; got " + dimensionCount);
|
||||
throw new IllegalArgumentException("when dimensionNumBytes is 0, dimensionCount must 0; got " + dimensionCount);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -484,6 +491,8 @@ public class FieldType implements IndexableFieldType {
|
|||
public int hashCode() {
|
||||
final int prime = 31;
|
||||
int result = 1;
|
||||
result = prime * result + dimensionCount;
|
||||
result = prime * result + dimensionNumBytes;
|
||||
result = prime * result + ((docValuesType == null) ? 0 : docValuesType.hashCode());
|
||||
result = prime * result + indexOptions.hashCode();
|
||||
result = prime * result + numericPrecisionStep;
|
||||
|
@ -504,6 +513,8 @@ public class FieldType implements IndexableFieldType {
|
|||
if (obj == null) return false;
|
||||
if (getClass() != obj.getClass()) return false;
|
||||
FieldType other = (FieldType) obj;
|
||||
if (dimensionCount != other.dimensionCount) return false;
|
||||
if (dimensionNumBytes != other.dimensionNumBytes) return false;
|
||||
if (docValuesType != other.docValuesType) return false;
|
||||
if (indexOptions != other.indexOptions) return false;
|
||||
if (numericPrecisionStep != other.numericPrecisionStep) return false;
|
||||
|
@ -517,5 +528,4 @@ public class FieldType implements IndexableFieldType {
|
|||
if (tokenized != other.tokenized) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -31,10 +31,10 @@ import org.apache.lucene.index.DocValuesType;
|
|||
*
|
||||
* <p>
|
||||
* Note that if you want to encode doubles or floats with proper sort order,
|
||||
* you will need to encode them with {@link org.apache.lucene.util.LegacyNumericUtils}:
|
||||
* you will need to encode them with {@link org.apache.lucene.util.NumericUtils}:
|
||||
*
|
||||
* <pre class="prettyprint">
|
||||
* document.add(new SortedNumericDocValuesField(name, LegacyNumericUtils.floatToSortableInt(-5.3f)));
|
||||
* document.add(new SortedNumericDocValuesField(name, NumericUtils.floatToSortableInt(-5.3f)));
|
||||
* </pre>
|
||||
*
|
||||
* <p>
|
||||
|
|
|
@ -23,6 +23,7 @@ import java.lang.reflect.Modifier;
|
|||
import org.apache.lucene.document.FieldType.LegacyNumericType;
|
||||
import org.apache.lucene.index.DocValuesType;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.PointValues;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
|
||||
|
@ -70,6 +71,10 @@ public class TestFieldType extends LuceneTestCase {
|
|||
FieldType ft10 = new FieldType();
|
||||
ft10.setStoreTermVectors(true);
|
||||
assertFalse(ft10.equals(ft));
|
||||
|
||||
FieldType ft11 = new FieldType();
|
||||
ft11.setDimensions(1, 4);
|
||||
assertFalse(ft11.equals(ft));
|
||||
}
|
||||
|
||||
public void testPointsToString() {
|
||||
|
@ -90,14 +95,16 @@ public class TestFieldType extends LuceneTestCase {
|
|||
}
|
||||
|
||||
private static FieldType randomFieldType() throws Exception {
|
||||
// setDimensions handled special as values must be in-bounds.
|
||||
Method setDimensionsMethod = FieldType.class.getMethod("setDimensions", int.class, int.class);
|
||||
FieldType ft = new FieldType();
|
||||
for (Method method : FieldType.class.getMethods()) {
|
||||
if ((method.getModifiers() & Modifier.PUBLIC) != 0 && method.getName().startsWith("set")) {
|
||||
if (method.getName().startsWith("set")) {
|
||||
final Class<?>[] parameterTypes = method.getParameterTypes();
|
||||
final Object[] args = new Object[parameterTypes.length];
|
||||
if (method.getName().equals("setPointDimensions")) {
|
||||
args[0] = 1 + random().nextInt(15);
|
||||
args[1] = 1 + random().nextInt(100);
|
||||
if (method.equals(setDimensionsMethod)) {
|
||||
args[0] = 1 + random().nextInt(PointValues.MAX_DIMENSIONS);
|
||||
args[1] = 1 + random().nextInt(PointValues.MAX_NUM_BYTES);
|
||||
} else {
|
||||
for (int i = 0; i < args.length; ++i) {
|
||||
args[i] = randomValue(parameterTypes[i]);
|
||||
|
|
|
@ -385,9 +385,8 @@ public class TestPointValues extends LuceneTestCase {
|
|||
for(int i=0;i<values.length;i++) {
|
||||
values[i] = new byte[4];
|
||||
}
|
||||
doc.add(new BinaryPoint("dim", values));
|
||||
expectThrows(IllegalArgumentException.class, () -> {
|
||||
w.addDocument(doc);
|
||||
doc.add(new BinaryPoint("dim", values));
|
||||
});
|
||||
|
||||
Document doc2 = new Document();
|
||||
|
|
|
@ -32,7 +32,6 @@ import org.apache.lucene.search.Query;
|
|||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.TwoPhaseIterator;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.util.LegacyNumericUtils;
|
||||
import org.apache.lucene.util.NumericUtils;
|
||||
|
||||
/** Represents a range over double values.
|
||||
|
@ -50,7 +49,7 @@ public final class DoubleRange extends Range {
|
|||
super(label);
|
||||
|
||||
// TODO: if DoubleDocValuesField used
|
||||
// LegacyNumericUtils.doubleToSortableLong format (instead of
|
||||
// NumericUtils.doubleToSortableLong format (instead of
|
||||
// Double.doubleToRawLongBits) we could do comparisons
|
||||
// in long space
|
||||
|
||||
|
|
Loading…
Reference in New Issue