LUCENE-6766: add float, double

This commit is contained in:
Mike McCandless 2016-05-07 18:36:13 -04:00
parent b62cad334c
commit a30c2632c8
6 changed files with 313 additions and 19 deletions

View File

@ -168,6 +168,12 @@ public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat {
case "int": case "int":
type = SortField.Type.INT; type = SortField.Type.INT;
break; break;
case "double":
type = SortField.Type.DOUBLE;
break;
case "float":
type = SortField.Type.FLOAT;
break;
default: default:
throw new CorruptIndexException("unable to parse sort type string: " + typeAsString, input); throw new CorruptIndexException("unable to parse sort type string: " + typeAsString, input);
} }
@ -216,6 +222,26 @@ public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat {
break; break;
} }
break; break;
case DOUBLE:
switch (missingLastAsString) {
case "null":
missingValue = null;
break;
default:
missingValue = Double.parseDouble(missingLastAsString);
break;
}
break;
case FLOAT:
switch (missingLastAsString) {
case "null":
missingValue = null;
break;
default:
missingValue = Float.parseFloat(missingLastAsString);
break;
}
break;
// nocommit need the rest // nocommit need the rest
default: default:
throw new AssertionError(); throw new AssertionError();
@ -338,6 +364,12 @@ public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat {
case INT: case INT:
sortType = "int"; sortType = "int";
break; break;
case DOUBLE:
sortType = "double";
break;
case FLOAT:
sortType = "float";
break;
// nocommit the rest: // nocommit the rest:
default: default:
throw new IllegalStateException("Unexpected sort type: " + sortField.getType()); throw new IllegalStateException("Unexpected sort type: " + sortField.getType());
@ -358,10 +390,8 @@ public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat {
missing = "first"; missing = "first";
} else if (missingValue == SortField.STRING_LAST) { } else if (missingValue == SortField.STRING_LAST) {
missing = "last"; missing = "last";
} else if (missingValue instanceof Long) {
missing = Long.toString((Long) missingValue);
} else { } else {
throw new IllegalStateException("Unexpected missing sort value: " + missingValue); missing = missingValue.toString();
} }
SimpleTextUtil.write(output, missing, scratch); SimpleTextUtil.write(output, missing, scratch);
SimpleTextUtil.writeNewline(output); SimpleTextUtil.writeNewline(output);

View File

@ -121,6 +121,12 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
case 2: case 2:
sortType = SortField.Type.INT; sortType = SortField.Type.INT;
break; break;
case 3:
sortType = SortField.Type.DOUBLE;
break;
case 4:
sortType = SortField.Type.FLOAT;
break;
default: default:
throw new CorruptIndexException("invalid index sort field type ID: " + sortTypeID, input); throw new CorruptIndexException("invalid index sort field type ID: " + sortTypeID, input);
} }
@ -163,6 +169,18 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
} }
missingValue = input.readInt(); missingValue = input.readInt();
break; break;
case DOUBLE:
if (b != 1) {
throw new CorruptIndexException("invalid missing value flag: " + b, input);
}
missingValue = Double.longBitsToDouble(input.readLong());
break;
case FLOAT:
if (b != 1) {
throw new CorruptIndexException("invalid missing value flag: " + b, input);
}
missingValue = Float.intBitsToFloat(input.readInt());
break;
default: default:
throw new AssertionError("unhandled sortType=" + sortType); throw new AssertionError("unhandled sortType=" + sortType);
} }
@ -240,6 +258,12 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
case INT: case INT:
sortTypeID = 2; sortTypeID = 2;
break; break;
case DOUBLE:
sortTypeID = 3;
break;
case FLOAT:
sortTypeID = 4;
break;
// nocommit the rest: // nocommit the rest:
default: default:
throw new IllegalStateException("Unexpected sort type: " + sortField.getType()); throw new IllegalStateException("Unexpected sort type: " + sortField.getType());
@ -270,6 +294,14 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
output.writeByte((byte) 1); output.writeByte((byte) 1);
output.writeInt(((Integer) missingValue).intValue()); output.writeInt(((Integer) missingValue).intValue());
break; break;
case DOUBLE:
output.writeByte((byte) 1);
output.writeLong(Double.doubleToLongBits(((Double) missingValue).doubleValue()));
break;
case FLOAT:
output.writeByte((byte) 1);
output.writeLong(Float.floatToIntBits(((Float) missingValue).floatValue()));
break;
// nocommit the rest: // nocommit the rest:
default: default:
throw new IllegalStateException("Unexpected sort type: " + sortField.getType()); throw new IllegalStateException("Unexpected sort type: " + sortField.getType());

View File

@ -648,6 +648,7 @@ public final class CheckIndex implements Closeable {
int toLoseDocCount = info.info.maxDoc(); int toLoseDocCount = info.info.maxDoc();
SegmentReader reader = null; SegmentReader reader = null;
Sort previousIndexSort = null;
try { try {
msg(infoStream, " version=" + (version == null ? "3.0" : version)); msg(infoStream, " version=" + (version == null ? "3.0" : version));
@ -661,6 +662,13 @@ public final class CheckIndex implements Closeable {
Sort indexSort = info.info.getIndexSort(); Sort indexSort = info.info.getIndexSort();
if (indexSort != null) { if (indexSort != null) {
msg(infoStream, " sort=" + indexSort); msg(infoStream, " sort=" + indexSort);
if (previousIndexSort != null) {
if (previousIndexSort.equals(indexSort) == false) {
throw new RuntimeException("index sort changed from " + previousIndexSort + " to " + indexSort);
}
} else {
previousIndexSort = indexSort;
}
} }
segInfoStat.numFiles = info.files().size(); segInfoStat.numFiles = info.files().size();
segInfoStat.sizeMB = info.sizeInBytes()/(1024.*1024.); segInfoStat.sizeMB = info.sizeInBytes()/(1024.*1024.);
@ -835,8 +843,6 @@ public final class CheckIndex implements Closeable {
for (int i = 0; i < fields.length; i++) { for (int i = 0; i < fields.length; i++) {
reverseMul[i] = fields[i].getReverse() ? -1 : 1; reverseMul[i] = fields[i].getReverse() ? -1 : 1;
comparators[i] = fields[i].getComparator(1, i).getLeafComparator(readerContext); comparators[i] = fields[i].getComparator(1, i).getLeafComparator(readerContext);
// nocommit we prevent SCORE?
//comparators[i].setScorer(FAKESCORER);
} }
int maxDoc = reader.maxDoc(); int maxDoc = reader.maxDoc();
@ -2585,9 +2591,6 @@ public final class CheckIndex implements Closeable {
} }
} }
// nocommit must check index is sorted, if it claims to be
// nocommit must check that all segments have the same sort, if any segment is sorted
/** /**
* Parse command line args into fields * Parse command line args into fields
* @param args The command line arguments * @param args The command line arguments

View File

@ -164,6 +164,51 @@ final class MultiSorter {
}; };
} }
case LONG:
{
List<NumericDocValues> values = new ArrayList<>();
List<Bits> docsWithFields = new ArrayList<>();
for(CodecReader reader : readers) {
values.add(DocValues.getNumeric(reader, sortField.getField()));
docsWithFields.add(DocValues.getDocsWithField(reader, sortField.getField()));
}
final int reverseMul;
if (sortField.getReverse()) {
reverseMul = -1;
} else {
reverseMul = 1;
}
final int missingValue;
if (sortField.getMissingValue() != null) {
missingValue = (Integer) sortField.getMissingValue();
} else {
missingValue = 0;
}
return new CrossReaderComparator() {
@Override
public int compare(int readerIndexA, int docIDA, int readerIndexB, int docIDB) {
long valueA;
if (docsWithFields.get(readerIndexA).get(docIDA)) {
valueA = (int) values.get(readerIndexA).get(docIDA);
} else {
valueA = missingValue;
}
long valueB;
if (docsWithFields.get(readerIndexB).get(docIDB)) {
valueB = (int) values.get(readerIndexB).get(docIDB);
} else {
valueB = missingValue;
}
return reverseMul * Long.compare(valueA, valueB);
}
};
}
case INT: case INT:
{ {
List<NumericDocValues> values = new ArrayList<>(); List<NumericDocValues> values = new ArrayList<>();
@ -208,8 +253,8 @@ final class MultiSorter {
} }
}; };
} }
case LONG:
// nocommit refactor/share at least numerics here: case DOUBLE:
{ {
List<NumericDocValues> values = new ArrayList<>(); List<NumericDocValues> values = new ArrayList<>();
List<Bits> docsWithFields = new ArrayList<>(); List<Bits> docsWithFields = new ArrayList<>();
@ -225,34 +270,80 @@ final class MultiSorter {
reverseMul = 1; reverseMul = 1;
} }
final int missingValue; final double missingValue;
if (sortField.getMissingValue() != null) { if (sortField.getMissingValue() != null) {
missingValue = (Integer) sortField.getMissingValue(); missingValue = (Double) sortField.getMissingValue();
} else { } else {
missingValue = 0; missingValue = 0.0;
} }
return new CrossReaderComparator() { return new CrossReaderComparator() {
@Override @Override
public int compare(int readerIndexA, int docIDA, int readerIndexB, int docIDB) { public int compare(int readerIndexA, int docIDA, int readerIndexB, int docIDB) {
long valueA; double valueA;
if (docsWithFields.get(readerIndexA).get(docIDA)) { if (docsWithFields.get(readerIndexA).get(docIDA)) {
valueA = (int) values.get(readerIndexA).get(docIDA); valueA = Double.longBitsToDouble(values.get(readerIndexA).get(docIDA));
} else { } else {
valueA = missingValue; valueA = missingValue;
} }
long valueB; double valueB;
if (docsWithFields.get(readerIndexB).get(docIDB)) { if (docsWithFields.get(readerIndexB).get(docIDB)) {
valueB = (int) values.get(readerIndexB).get(docIDB); valueB = Double.longBitsToDouble(values.get(readerIndexB).get(docIDB));
} else { } else {
valueB = missingValue; valueB = missingValue;
} }
return reverseMul * Long.compare(valueA, valueB); return reverseMul * Double.compare(valueA, valueB);
} }
}; };
} }
case FLOAT:
{
List<NumericDocValues> values = new ArrayList<>();
List<Bits> docsWithFields = new ArrayList<>();
for(CodecReader reader : readers) {
values.add(DocValues.getNumeric(reader, sortField.getField()));
docsWithFields.add(DocValues.getDocsWithField(reader, sortField.getField()));
}
final int reverseMul;
if (sortField.getReverse()) {
reverseMul = -1;
} else {
reverseMul = 1;
}
final float missingValue;
if (sortField.getMissingValue() != null) {
missingValue = (Float) sortField.getMissingValue();
} else {
missingValue = 0.0f;
}
return new CrossReaderComparator() {
@Override
public int compare(int readerIndexA, int docIDA, int readerIndexB, int docIDB) {
float valueA;
if (docsWithFields.get(readerIndexA).get(docIDA)) {
valueA = Float.intBitsToFloat((int) values.get(readerIndexA).get(docIDA));
} else {
valueA = missingValue;
}
float valueB;
if (docsWithFields.get(readerIndexB).get(docIDB)) {
valueB = Float.intBitsToFloat((int) values.get(readerIndexB).get(docIDB));
} else {
valueB = missingValue;
}
return reverseMul * Float.compare(valueA, valueB);
}
};
}
// nocommit do the rest: // nocommit do the rest:
default: default:
throw new IllegalArgumentException("unhandled SortField.getType()=" + sortField.getType()); throw new IllegalArgumentException("unhandled SortField.getType()=" + sortField.getType());

View File

@ -840,7 +840,6 @@ class SortingLeafReader extends FilterLeafReader {
if (inPointValues == null) { if (inPointValues == null) {
return null; return null;
} else { } else {
// nocommit make sure this is tested
return new SortingPointValues(inPointValues, docMap); return new SortingPointValues(inPointValues, docMap);
} }
} }

View File

@ -36,9 +36,11 @@ import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.document.BinaryDocValuesField; import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.BinaryPoint; import org.apache.lucene.document.BinaryPoint;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.DoubleDocValuesField;
import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType; import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.FloatDocValuesField;
import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField; import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.document.SortedNumericDocValuesField;
@ -72,6 +74,7 @@ import org.junit.BeforeClass;
// nocommit test tie break // nocommit test tie break
// nocommit test multiple sorts // nocommit test multiple sorts
// nocommit test update dvs // nocommit test update dvs
// nocommit test missing value
// nocommit test EarlyTerminatingCollector // nocommit test EarlyTerminatingCollector
@ -113,6 +116,142 @@ public class TestIndexSorting extends LuceneTestCase {
dir.close(); dir.close();
} }
public void testBasicLong() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
Sort indexSort = new Sort(new SortField("foo", SortField.Type.LONG));
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new NumericDocValuesField("foo", 18));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
doc = new Document();
doc.add(new NumericDocValuesField("foo", -1));
w.addDocument(doc);
w.commit();
doc = new Document();
doc.add(new NumericDocValuesField("foo", 7));
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
LeafReader leaf = getOnlyLeafReader(r);
assertEquals(3, leaf.maxDoc());
NumericDocValues values = leaf.getNumericDocValues("foo");
assertEquals(-1, values.get(0));
assertEquals(7, values.get(1));
assertEquals(18, values.get(2));
r.close();
w.close();
dir.close();
}
public void testBasicInt() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
Sort indexSort = new Sort(new SortField("foo", SortField.Type.INT));
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new NumericDocValuesField("foo", 18));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
doc = new Document();
doc.add(new NumericDocValuesField("foo", -1));
w.addDocument(doc);
w.commit();
doc = new Document();
doc.add(new NumericDocValuesField("foo", 7));
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
LeafReader leaf = getOnlyLeafReader(r);
assertEquals(3, leaf.maxDoc());
NumericDocValues values = leaf.getNumericDocValues("foo");
assertEquals(-1, values.get(0));
assertEquals(7, values.get(1));
assertEquals(18, values.get(2));
r.close();
w.close();
dir.close();
}
public void testBasicDouble() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
Sort indexSort = new Sort(new SortField("foo", SortField.Type.DOUBLE));
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new DoubleDocValuesField("foo", 18.0));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
doc = new Document();
doc.add(new DoubleDocValuesField("foo", -1.0));
w.addDocument(doc);
w.commit();
doc = new Document();
doc.add(new DoubleDocValuesField("foo", 7.0));
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
LeafReader leaf = getOnlyLeafReader(r);
assertEquals(3, leaf.maxDoc());
NumericDocValues values = leaf.getNumericDocValues("foo");
assertEquals(-1.0, Double.longBitsToDouble(values.get(0)), 0.0);
assertEquals(7.0, Double.longBitsToDouble(values.get(1)), 0.0);
assertEquals(18.0, Double.longBitsToDouble(values.get(2)), 0.0);
r.close();
w.close();
dir.close();
}
public void testBasicFloat() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
Sort indexSort = new Sort(new SortField("foo", SortField.Type.FLOAT));
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new FloatDocValuesField("foo", 18.0f));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
doc = new Document();
doc.add(new FloatDocValuesField("foo", -1.0f));
w.addDocument(doc);
w.commit();
doc = new Document();
doc.add(new FloatDocValuesField("foo", 7.0f));
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
LeafReader leaf = getOnlyLeafReader(r);
assertEquals(3, leaf.maxDoc());
NumericDocValues values = leaf.getNumericDocValues("foo");
assertEquals(-1.0f, Float.intBitsToFloat((int) values.get(0)), 0.0f);
assertEquals(7.0f, Float.intBitsToFloat((int) values.get(1)), 0.0f);
assertEquals(18.0f, Float.intBitsToFloat((int) values.get(2)), 0.0f);
r.close();
w.close();
dir.close();
}
public void testSortOnMerge(boolean withDeletes) throws IOException { public void testSortOnMerge(boolean withDeletes) throws IOException {
Directory dir = newDirectory(); Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));