LUCENE-6766: add float, double

This commit is contained in:
Mike McCandless 2016-05-07 18:36:13 -04:00
parent 231cb3eb42
commit 8098a911be
6 changed files with 313 additions and 19 deletions

View File

@ -168,6 +168,12 @@ public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat {
case "int":
type = SortField.Type.INT;
break;
case "double":
type = SortField.Type.DOUBLE;
break;
case "float":
type = SortField.Type.FLOAT;
break;
default:
throw new CorruptIndexException("unable to parse sort type string: " + typeAsString, input);
}
@ -216,6 +222,26 @@ public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat {
break;
}
break;
case DOUBLE:
switch (missingLastAsString) {
case "null":
missingValue = null;
break;
default:
missingValue = Double.parseDouble(missingLastAsString);
break;
}
break;
case FLOAT:
switch (missingLastAsString) {
case "null":
missingValue = null;
break;
default:
missingValue = Float.parseFloat(missingLastAsString);
break;
}
break;
// nocommit need the rest
default:
throw new AssertionError();
@ -338,6 +364,12 @@ public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat {
case INT:
sortType = "int";
break;
case DOUBLE:
sortType = "double";
break;
case FLOAT:
sortType = "float";
break;
// nocommit the rest:
default:
throw new IllegalStateException("Unexpected sort type: " + sortField.getType());
@ -358,10 +390,8 @@ public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat {
missing = "first";
} else if (missingValue == SortField.STRING_LAST) {
missing = "last";
} else if (missingValue instanceof Long) {
missing = Long.toString((Long) missingValue);
} else {
throw new IllegalStateException("Unexpected missing sort value: " + missingValue);
missing = missingValue.toString();
}
SimpleTextUtil.write(output, missing, scratch);
SimpleTextUtil.writeNewline(output);

View File

@ -121,6 +121,12 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
case 2:
sortType = SortField.Type.INT;
break;
case 3:
sortType = SortField.Type.DOUBLE;
break;
case 4:
sortType = SortField.Type.FLOAT;
break;
default:
throw new CorruptIndexException("invalid index sort field type ID: " + sortTypeID, input);
}
@ -163,6 +169,18 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
}
missingValue = input.readInt();
break;
case DOUBLE:
if (b != 1) {
throw new CorruptIndexException("invalid missing value flag: " + b, input);
}
missingValue = Double.longBitsToDouble(input.readLong());
break;
case FLOAT:
if (b != 1) {
throw new CorruptIndexException("invalid missing value flag: " + b, input);
}
missingValue = Float.intBitsToFloat(input.readInt());
break;
default:
throw new AssertionError("unhandled sortType=" + sortType);
}
@ -240,6 +258,12 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
case INT:
sortTypeID = 2;
break;
case DOUBLE:
sortTypeID = 3;
break;
case FLOAT:
sortTypeID = 4;
break;
// nocommit the rest:
default:
throw new IllegalStateException("Unexpected sort type: " + sortField.getType());
@ -270,6 +294,14 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
output.writeByte((byte) 1);
output.writeInt(((Integer) missingValue).intValue());
break;
case DOUBLE:
output.writeByte((byte) 1);
output.writeLong(Double.doubleToLongBits(((Double) missingValue).doubleValue()));
break;
case FLOAT:
output.writeByte((byte) 1);
output.writeLong(Float.floatToIntBits(((Float) missingValue).floatValue()));
break;
// nocommit the rest:
default:
throw new IllegalStateException("Unexpected sort type: " + sortField.getType());

View File

@ -648,6 +648,7 @@ public final class CheckIndex implements Closeable {
int toLoseDocCount = info.info.maxDoc();
SegmentReader reader = null;
Sort previousIndexSort = null;
try {
msg(infoStream, " version=" + (version == null ? "3.0" : version));
@ -661,6 +662,13 @@ public final class CheckIndex implements Closeable {
Sort indexSort = info.info.getIndexSort();
if (indexSort != null) {
msg(infoStream, " sort=" + indexSort);
if (previousIndexSort != null) {
if (previousIndexSort.equals(indexSort) == false) {
throw new RuntimeException("index sort changed from " + previousIndexSort + " to " + indexSort);
}
} else {
previousIndexSort = indexSort;
}
}
segInfoStat.numFiles = info.files().size();
segInfoStat.sizeMB = info.sizeInBytes()/(1024.*1024.);
@ -835,8 +843,6 @@ public final class CheckIndex implements Closeable {
for (int i = 0; i < fields.length; i++) {
reverseMul[i] = fields[i].getReverse() ? -1 : 1;
comparators[i] = fields[i].getComparator(1, i).getLeafComparator(readerContext);
// nocommit we prevent SCORE?
//comparators[i].setScorer(FAKESCORER);
}
int maxDoc = reader.maxDoc();
@ -2585,9 +2591,6 @@ public final class CheckIndex implements Closeable {
}
}
// nocommit must check index is sorted, if it claims to be
// nocommit must check that all segments have the same sort, if any segment is sorted
/**
* Parse command line args into fields
* @param args The command line arguments

View File

@ -164,6 +164,51 @@ final class MultiSorter {
};
}
case LONG:
{
List<NumericDocValues> values = new ArrayList<>();
List<Bits> docsWithFields = new ArrayList<>();
for(CodecReader reader : readers) {
values.add(DocValues.getNumeric(reader, sortField.getField()));
docsWithFields.add(DocValues.getDocsWithField(reader, sortField.getField()));
}
final int reverseMul;
if (sortField.getReverse()) {
reverseMul = -1;
} else {
reverseMul = 1;
}
final int missingValue;
if (sortField.getMissingValue() != null) {
missingValue = (Integer) sortField.getMissingValue();
} else {
missingValue = 0;
}
return new CrossReaderComparator() {
@Override
public int compare(int readerIndexA, int docIDA, int readerIndexB, int docIDB) {
long valueA;
if (docsWithFields.get(readerIndexA).get(docIDA)) {
valueA = (int) values.get(readerIndexA).get(docIDA);
} else {
valueA = missingValue;
}
long valueB;
if (docsWithFields.get(readerIndexB).get(docIDB)) {
valueB = (int) values.get(readerIndexB).get(docIDB);
} else {
valueB = missingValue;
}
return reverseMul * Long.compare(valueA, valueB);
}
};
}
case INT:
{
List<NumericDocValues> values = new ArrayList<>();
@ -208,8 +253,8 @@ final class MultiSorter {
}
};
}
case LONG:
// nocommit refactor/share at least numerics here:
case DOUBLE:
{
List<NumericDocValues> values = new ArrayList<>();
List<Bits> docsWithFields = new ArrayList<>();
@ -225,34 +270,80 @@ final class MultiSorter {
reverseMul = 1;
}
final int missingValue;
final double missingValue;
if (sortField.getMissingValue() != null) {
missingValue = (Integer) sortField.getMissingValue();
missingValue = (Double) sortField.getMissingValue();
} else {
missingValue = 0;
missingValue = 0.0;
}
return new CrossReaderComparator() {
@Override
public int compare(int readerIndexA, int docIDA, int readerIndexB, int docIDB) {
long valueA;
double valueA;
if (docsWithFields.get(readerIndexA).get(docIDA)) {
valueA = (int) values.get(readerIndexA).get(docIDA);
valueA = Double.longBitsToDouble(values.get(readerIndexA).get(docIDA));
} else {
valueA = missingValue;
}
long valueB;
double valueB;
if (docsWithFields.get(readerIndexB).get(docIDB)) {
valueB = (int) values.get(readerIndexB).get(docIDB);
valueB = Double.longBitsToDouble(values.get(readerIndexB).get(docIDB));
} else {
valueB = missingValue;
}
return reverseMul * Long.compare(valueA, valueB);
return reverseMul * Double.compare(valueA, valueB);
}
};
}
case FLOAT:
{
List<NumericDocValues> values = new ArrayList<>();
List<Bits> docsWithFields = new ArrayList<>();
for(CodecReader reader : readers) {
values.add(DocValues.getNumeric(reader, sortField.getField()));
docsWithFields.add(DocValues.getDocsWithField(reader, sortField.getField()));
}
final int reverseMul;
if (sortField.getReverse()) {
reverseMul = -1;
} else {
reverseMul = 1;
}
final float missingValue;
if (sortField.getMissingValue() != null) {
missingValue = (Float) sortField.getMissingValue();
} else {
missingValue = 0.0f;
}
return new CrossReaderComparator() {
@Override
public int compare(int readerIndexA, int docIDA, int readerIndexB, int docIDB) {
float valueA;
if (docsWithFields.get(readerIndexA).get(docIDA)) {
valueA = Float.intBitsToFloat((int) values.get(readerIndexA).get(docIDA));
} else {
valueA = missingValue;
}
float valueB;
if (docsWithFields.get(readerIndexB).get(docIDB)) {
valueB = Float.intBitsToFloat((int) values.get(readerIndexB).get(docIDB));
} else {
valueB = missingValue;
}
return reverseMul * Float.compare(valueA, valueB);
}
};
}
// nocommit do the rest:
default:
throw new IllegalArgumentException("unhandled SortField.getType()=" + sortField.getType());

View File

@ -840,7 +840,6 @@ class SortingLeafReader extends FilterLeafReader {
if (inPointValues == null) {
return null;
} else {
// nocommit make sure this is tested
return new SortingPointValues(inPointValues, docMap);
}
}

View File

@ -36,9 +36,11 @@ import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.BinaryPoint;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DoubleDocValuesField;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.FloatDocValuesField;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.SortedNumericDocValuesField;
@ -72,6 +74,7 @@ import org.junit.BeforeClass;
// nocommit test tie break
// nocommit test multiple sorts
// nocommit test update dvs
// nocommit test missing value
// nocommit test EarlyTerminatingCollector
@ -113,6 +116,142 @@ public class TestIndexSorting extends LuceneTestCase {
dir.close();
}
public void testBasicLong() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
Sort indexSort = new Sort(new SortField("foo", SortField.Type.LONG));
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new NumericDocValuesField("foo", 18));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
doc = new Document();
doc.add(new NumericDocValuesField("foo", -1));
w.addDocument(doc);
w.commit();
doc = new Document();
doc.add(new NumericDocValuesField("foo", 7));
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
LeafReader leaf = getOnlyLeafReader(r);
assertEquals(3, leaf.maxDoc());
NumericDocValues values = leaf.getNumericDocValues("foo");
assertEquals(-1, values.get(0));
assertEquals(7, values.get(1));
assertEquals(18, values.get(2));
r.close();
w.close();
dir.close();
}
public void testBasicInt() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
Sort indexSort = new Sort(new SortField("foo", SortField.Type.INT));
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new NumericDocValuesField("foo", 18));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
doc = new Document();
doc.add(new NumericDocValuesField("foo", -1));
w.addDocument(doc);
w.commit();
doc = new Document();
doc.add(new NumericDocValuesField("foo", 7));
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
LeafReader leaf = getOnlyLeafReader(r);
assertEquals(3, leaf.maxDoc());
NumericDocValues values = leaf.getNumericDocValues("foo");
assertEquals(-1, values.get(0));
assertEquals(7, values.get(1));
assertEquals(18, values.get(2));
r.close();
w.close();
dir.close();
}
public void testBasicDouble() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
Sort indexSort = new Sort(new SortField("foo", SortField.Type.DOUBLE));
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new DoubleDocValuesField("foo", 18.0));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
doc = new Document();
doc.add(new DoubleDocValuesField("foo", -1.0));
w.addDocument(doc);
w.commit();
doc = new Document();
doc.add(new DoubleDocValuesField("foo", 7.0));
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
LeafReader leaf = getOnlyLeafReader(r);
assertEquals(3, leaf.maxDoc());
NumericDocValues values = leaf.getNumericDocValues("foo");
assertEquals(-1.0, Double.longBitsToDouble(values.get(0)), 0.0);
assertEquals(7.0, Double.longBitsToDouble(values.get(1)), 0.0);
assertEquals(18.0, Double.longBitsToDouble(values.get(2)), 0.0);
r.close();
w.close();
dir.close();
}
public void testBasicFloat() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
Sort indexSort = new Sort(new SortField("foo", SortField.Type.FLOAT));
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new FloatDocValuesField("foo", 18.0f));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
doc = new Document();
doc.add(new FloatDocValuesField("foo", -1.0f));
w.addDocument(doc);
w.commit();
doc = new Document();
doc.add(new FloatDocValuesField("foo", 7.0f));
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
LeafReader leaf = getOnlyLeafReader(r);
assertEquals(3, leaf.maxDoc());
NumericDocValues values = leaf.getNumericDocValues("foo");
assertEquals(-1.0f, Float.intBitsToFloat((int) values.get(0)), 0.0f);
assertEquals(7.0f, Float.intBitsToFloat((int) values.get(1)), 0.0f);
assertEquals(18.0f, Float.intBitsToFloat((int) values.get(2)), 0.0f);
r.close();
w.close();
dir.close();
}
public void testSortOnMerge(boolean withDeletes) throws IOException {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));