LUCENE_6766: add missing first/last tests

This commit is contained in:
Mike McCandless 2016-05-08 06:41:55 -04:00
parent da473399a3
commit 78a5501063
4 changed files with 383 additions and 19 deletions

View File

@ -299,7 +299,7 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
break;
case FLOAT:
output.writeByte((byte) 1);
output.writeLong(Float.floatToIntBits(((Float) missingValue).floatValue()));
output.writeInt(Float.floatToIntBits(((Float) missingValue).floatValue()));
break;
default:
throw new IllegalStateException("Unexpected sort type: " + sortField.getType());

View File

@ -136,9 +136,9 @@ final class MultiSorter {
}
final int missingOrd;
if (sortField.getMissingValue() == SortField.STRING_LAST) {
missingOrd = Integer.MIN_VALUE;
} else {
missingOrd = Integer.MAX_VALUE;
} else {
missingOrd = Integer.MIN_VALUE;
}
final int reverseMul;
@ -180,10 +180,10 @@ final class MultiSorter {
reverseMul = 1;
}
final int missingValue;
final long missingValue;
if (sortField.getMissingValue() != null) {
missingValue = (Integer) sortField.getMissingValue();
missingValue = (Long) sortField.getMissingValue();
} else {
missingValue = 0;
}
@ -193,14 +193,14 @@ final class MultiSorter {
public int compare(int readerIndexA, int docIDA, int readerIndexB, int docIDB) {
long valueA;
if (docsWithFields.get(readerIndexA).get(docIDA)) {
valueA = (int) values.get(readerIndexA).get(docIDA);
valueA = values.get(readerIndexA).get(docIDA);
} else {
valueA = missingValue;
}
long valueB;
if (docsWithFields.get(readerIndexB).get(docIDB)) {
valueB = (int) values.get(readerIndexB).get(docIDB);
valueB = values.get(readerIndexB).get(docIDB);
} else {
valueB = missingValue;
}

View File

@ -48,6 +48,8 @@ final class SegmentMerger {
final MergeState mergeState;
private final FieldInfos.Builder fieldInfosBuilder;
// nocommit make sure infoStream states per-segment-being-merged if they are already sorted
// note, just like in codec apis Directory 'dir' is NOT the same as segmentInfo.dir!!
SegmentMerger(List<CodecReader> readers, SegmentInfo segmentInfo, InfoStream infoStream, Directory dir,
FieldInfos.FieldNumbers fieldNumbers, IOContext context) throws IOException {
@ -59,6 +61,11 @@ final class SegmentMerger {
this.codec = segmentInfo.getCodec();
this.context = context;
this.fieldInfosBuilder = new FieldInfos.Builder(fieldNumbers);
if (mergeState.infoStream.isEnabled("SM")) {
if (segmentInfo.getIndexSort() != null) {
mergeState.infoStream.message("SM", "index sort during merge: " + segmentInfo.getIndexSort());
}
}
}
/** True if any merging should happen */

View File

@ -118,6 +118,76 @@ public class TestIndexSorting extends LuceneTestCase {
dir.close();
}
public void testMissingStringFirst() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
SortField sortField = new SortField("foo", SortField.Type.STRING);
sortField.setMissingValue(SortField.STRING_FIRST);
Sort indexSort = new Sort(sortField);
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new SortedDocValuesField("foo", new BytesRef("zzz")));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
// missing
w.addDocument(new Document());
w.commit();
doc = new Document();
doc.add(new SortedDocValuesField("foo", new BytesRef("mmm")));
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
LeafReader leaf = getOnlyLeafReader(r);
assertEquals(3, leaf.maxDoc());
SortedDocValues values = leaf.getSortedDocValues("foo");
assertEquals(-1, values.getOrd(0));
assertEquals("mmm", values.get(1).utf8ToString());
assertEquals("zzz", values.get(2).utf8ToString());
r.close();
w.close();
dir.close();
}
public void testMissingStringLast() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
SortField sortField = new SortField("foo", SortField.Type.STRING);
sortField.setMissingValue(SortField.STRING_LAST);
Sort indexSort = new Sort(sortField);
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new SortedDocValuesField("foo", new BytesRef("zzz")));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
// missing
w.addDocument(new Document());
w.commit();
doc = new Document();
doc.add(new SortedDocValuesField("foo", new BytesRef("mmm")));
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
LeafReader leaf = getOnlyLeafReader(r);
assertEquals(3, leaf.maxDoc());
SortedDocValues values = leaf.getSortedDocValues("foo");
assertEquals("mmm", values.get(0).utf8ToString());
assertEquals("zzz", values.get(1).utf8ToString());
assertEquals(-1, values.getOrd(2));
r.close();
w.close();
dir.close();
}
public void testBasicLong() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
@ -152,6 +222,80 @@ public class TestIndexSorting extends LuceneTestCase {
dir.close();
}
public void testMissingLongFirst() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
SortField sortField = new SortField("foo", SortField.Type.LONG);
sortField.setMissingValue(Long.valueOf(Long.MIN_VALUE));
Sort indexSort = new Sort(sortField);
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new NumericDocValuesField("foo", 18));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
// missing
w.addDocument(new Document());
w.commit();
doc = new Document();
doc.add(new NumericDocValuesField("foo", 7));
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
LeafReader leaf = getOnlyLeafReader(r);
assertEquals(3, leaf.maxDoc());
NumericDocValues values = leaf.getNumericDocValues("foo");
Bits docsWithField = leaf.getDocsWithField("foo");
assertEquals(0, values.get(0));
assertFalse(docsWithField.get(0));
assertEquals(7, values.get(1));
assertEquals(18, values.get(2));
r.close();
w.close();
dir.close();
}
public void testMissingLongLast() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
SortField sortField = new SortField("foo", SortField.Type.LONG);
sortField.setMissingValue(Long.valueOf(Long.MAX_VALUE));
Sort indexSort = new Sort(sortField);
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new NumericDocValuesField("foo", 18));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
// missing
w.addDocument(new Document());
w.commit();
doc = new Document();
doc.add(new NumericDocValuesField("foo", 7));
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
LeafReader leaf = getOnlyLeafReader(r);
assertEquals(3, leaf.maxDoc());
NumericDocValues values = leaf.getNumericDocValues("foo");
Bits docsWithField = leaf.getDocsWithField("foo");
assertEquals(7, values.get(0));
assertEquals(18, values.get(1));
assertEquals(0, values.get(2));
assertFalse(docsWithField.get(2));
r.close();
w.close();
dir.close();
}
public void testBasicInt() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
@ -186,6 +330,80 @@ public class TestIndexSorting extends LuceneTestCase {
dir.close();
}
public void testMissingIntFirst() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
SortField sortField = new SortField("foo", SortField.Type.INT);
sortField.setMissingValue(Integer.valueOf(Integer.MIN_VALUE));
Sort indexSort = new Sort(sortField);
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new NumericDocValuesField("foo", 18));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
// missing
w.addDocument(new Document());
w.commit();
doc = new Document();
doc.add(new NumericDocValuesField("foo", 7));
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
LeafReader leaf = getOnlyLeafReader(r);
assertEquals(3, leaf.maxDoc());
NumericDocValues values = leaf.getNumericDocValues("foo");
Bits docsWithField = leaf.getDocsWithField("foo");
assertEquals(0, values.get(0));
assertFalse(docsWithField.get(0));
assertEquals(7, values.get(1));
assertEquals(18, values.get(2));
r.close();
w.close();
dir.close();
}
public void testMissingIntLast() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
SortField sortField = new SortField("foo", SortField.Type.INT);
sortField.setMissingValue(Integer.valueOf(Integer.MAX_VALUE));
Sort indexSort = new Sort(sortField);
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new NumericDocValuesField("foo", 18));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
// missing
w.addDocument(new Document());
w.commit();
doc = new Document();
doc.add(new NumericDocValuesField("foo", 7));
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
LeafReader leaf = getOnlyLeafReader(r);
assertEquals(3, leaf.maxDoc());
NumericDocValues values = leaf.getNumericDocValues("foo");
Bits docsWithField = leaf.getDocsWithField("foo");
assertEquals(7, values.get(0));
assertEquals(18, values.get(1));
assertEquals(0, values.get(2));
assertFalse(docsWithField.get(2));
r.close();
w.close();
dir.close();
}
public void testBasicDouble() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
@ -220,6 +438,80 @@ public class TestIndexSorting extends LuceneTestCase {
dir.close();
}
public void testMissingDoubleFirst() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
SortField sortField = new SortField("foo", SortField.Type.DOUBLE);
sortField.setMissingValue(Double.NEGATIVE_INFINITY);
Sort indexSort = new Sort(sortField);
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new DoubleDocValuesField("foo", 18.0));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
// missing
w.addDocument(new Document());
w.commit();
doc = new Document();
doc.add(new DoubleDocValuesField("foo", 7.0));
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
LeafReader leaf = getOnlyLeafReader(r);
assertEquals(3, leaf.maxDoc());
NumericDocValues values = leaf.getNumericDocValues("foo");
Bits docsWithField = leaf.getDocsWithField("foo");
assertEquals(0.0, Double.longBitsToDouble(values.get(0)), 0.0);
assertFalse(docsWithField.get(0));
assertEquals(7.0, Double.longBitsToDouble(values.get(1)), 0.0);
assertEquals(18.0, Double.longBitsToDouble(values.get(2)), 0.0);
r.close();
w.close();
dir.close();
}
public void testMissingDoubleLast() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
SortField sortField = new SortField("foo", SortField.Type.DOUBLE);
sortField.setMissingValue(Double.POSITIVE_INFINITY);
Sort indexSort = new Sort(sortField);
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new DoubleDocValuesField("foo", 18.0));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
// missing
w.addDocument(new Document());
w.commit();
doc = new Document();
doc.add(new DoubleDocValuesField("foo", 7.0));
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
LeafReader leaf = getOnlyLeafReader(r);
assertEquals(3, leaf.maxDoc());
NumericDocValues values = leaf.getNumericDocValues("foo");
Bits docsWithField = leaf.getDocsWithField("foo");
assertEquals(7.0, Double.longBitsToDouble(values.get(0)), 0.0);
assertEquals(18.0, Double.longBitsToDouble(values.get(1)), 0.0);
assertEquals(0.0, Double.longBitsToDouble(values.get(2)), 0.0);
assertFalse(docsWithField.get(2));
r.close();
w.close();
dir.close();
}
public void testBasicFloat() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
@ -254,7 +546,82 @@ public class TestIndexSorting extends LuceneTestCase {
dir.close();
}
public void testSortOnMerge(boolean withDeletes) throws IOException {
public void testMissingFloatFirst() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
SortField sortField = new SortField("foo", SortField.Type.FLOAT);
sortField.setMissingValue(Float.NEGATIVE_INFINITY);
Sort indexSort = new Sort(sortField);
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new FloatDocValuesField("foo", 18.0f));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
// missing
w.addDocument(new Document());
w.commit();
doc = new Document();
doc.add(new FloatDocValuesField("foo", 7.0f));
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
LeafReader leaf = getOnlyLeafReader(r);
assertEquals(3, leaf.maxDoc());
NumericDocValues values = leaf.getNumericDocValues("foo");
Bits docsWithField = leaf.getDocsWithField("foo");
assertEquals(0.0f, Float.intBitsToFloat((int) values.get(0)), 0.0f);
assertFalse(docsWithField.get(0));
assertEquals(7.0f, Float.intBitsToFloat((int) values.get(1)), 0.0f);
assertEquals(18.0f, Float.intBitsToFloat((int) values.get(2)), 0.0f);
r.close();
w.close();
dir.close();
}
public void testMissingFloatLast() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
SortField sortField = new SortField("foo", SortField.Type.FLOAT);
sortField.setMissingValue(Float.POSITIVE_INFINITY);
Sort indexSort = new Sort(sortField);
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new FloatDocValuesField("foo", 18.0f));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
// missing
w.addDocument(new Document());
w.commit();
doc = new Document();
doc.add(new FloatDocValuesField("foo", 7.0f));
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
LeafReader leaf = getOnlyLeafReader(r);
assertEquals(3, leaf.maxDoc());
NumericDocValues values = leaf.getNumericDocValues("foo");
Bits docsWithField = leaf.getDocsWithField("foo");
assertEquals(7.0f, Float.intBitsToFloat((int) values.get(0)), 0.0f);
assertEquals(18.0f, Float.intBitsToFloat((int) values.get(1)), 0.0f);
assertEquals(0.0f, Float.intBitsToFloat((int) values.get(2)), 0.0f);
assertFalse(docsWithField.get(2));
r.close();
w.close();
dir.close();
}
public void testRandom1() throws IOException {
boolean withDeletes = random().nextBoolean();
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
Sort indexSort = new Sort(new SortField("foo", SortField.Type.LONG));
@ -323,14 +690,6 @@ public class TestIndexSorting extends LuceneTestCase {
dir.close();
}
public void testSortOnMerge() throws IOException {
testSortOnMerge(false);
}
public void testSortOnMergeWithDeletes() throws IOException {
testSortOnMerge(true);
}
static class UpdateRunnable implements Runnable {
private final int numDocs;
@ -666,9 +1025,7 @@ public class TestIndexSorting extends LuceneTestCase {
}
}
// nocommit testrandom1 with deletions
public void testRandom1() throws Exception {
public void testRandom2() throws Exception {
int numDocs = atLeast(100);
FieldType POSITIONS_TYPE = new FieldType(TextField.TYPE_NOT_STORED);