LUCENE-8592: Fix index sorting corruption due to numeric overflow

The merge sort of sorted segments can produce an invalid
sort if the sort field is an Integer/Long that uses reverse order and contains values equal to
Integer/Long#MIN_VALUE. These values are always sorted first during a merge
(instead of last because of the reverse order) due to this bug.
Indices affected by the bug can be detected by running the CheckIndex command on a
distribution that contains the fix (7.6+).
This commit is contained in:
Jim Ferenczi 2018-12-07 19:16:42 +01:00
parent da7919f7a4
commit 9f29ed0757
5 changed files with 941 additions and 705 deletions

View File

@ -314,7 +314,14 @@ Bug fixes
* LUCENE-8595: Fix interleaved DV update and reset. Interleaved update and reset value
to the same doc in the same updates package looses an update if the reset comes before
the update as well as loosing the reset if the update comes frist. (Simon Willnauer, Adrien Grant)
the update as well as loosing the reset if the update comes frist. (Simon Willnauer, Adrien Grand)
* LUCENE-8592: Fix index sorting corruption due to numeric overflow. The merge of sorted segments
can produce an invalid sort if the sort field is an Integer/Long that uses reverse order and contains
values equal to Integer/Long#MIN_VALUE. These values are always sorted first during a merge
(instead of last because of the reverse order) due to this bug. Indices affected by the bug can be
detected by running the CheckIndex command on a distribution that contains the fix (7.6+).
(Jim Ferenczi, Adrien Grand, Mike McCandless, Simon Willnauer)
New Features

View File

@ -1717,6 +1717,33 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
dir.close();
}
}
/**
* Tests that {@link CheckIndex} can detect invalid sort on sorted indices created
* before https://issues.apache.org/jira/browse/LUCENE-8592.
*/
public void testSortedIndexWithInvalidSort() throws Exception {
Path path = createTempDir("sorted");
String name = "sorted-invalid.7.5.0.zip";
InputStream resource = TestBackwardsCompatibility.class.getResourceAsStream(name);
assertNotNull("Sorted index index " + name + " not found", resource);
TestUtil.unzip(resource, path);
Directory dir = FSDirectory.open(path);
DirectoryReader reader = DirectoryReader.open(dir);
assertEquals(1, reader.leaves().size());
Sort sort = reader.leaves().get(0).reader().getMetaData().getSort();
assertNotNull(sort);
assertEquals("<long: \"dateDV\">! missingValue=-9223372036854775808", sort.toString());
reader.close();
CheckIndex.Status status = TestUtil.checkIndex(dir);
assertEquals(1, status.segmentInfos.size());
assertNotNull(status.segmentInfos.get(0).indexSortStatus.error);
assertEquals(status.segmentInfos.get(0).indexSortStatus.error.getMessage(),
"segment has indexSort=<long: \"dateDV\">! missingValue=-9223372036854775808 but docID=4 sorts after docID=5");
dir.close();
}
static long getValue(BinaryDocValues bdv) throws IOException {
BytesRef term = bdv.binaryValue();

View File

@ -42,17 +42,18 @@ final class MultiSorter {
SortField fields[] = sort.getSort();
final ComparableProvider[][] comparables = new ComparableProvider[fields.length][];
final int[] reverseMuls = new int[fields.length];
for(int i=0;i<fields.length;i++) {
comparables[i] = getComparableProviders(readers, fields[i]);
reverseMuls[i] = fields[i].getReverse() ? -1 : 1;
}
int leafCount = readers.size();
PriorityQueue<LeafAndDocID> queue = new PriorityQueue<LeafAndDocID>(leafCount) {
@Override
public boolean lessThan(LeafAndDocID a, LeafAndDocID b) {
for(int i=0;i<comparables.length;i++) {
int cmp = a.values[i].compareTo(b.values[i]);
int cmp = reverseMuls[i] * a.values[i].compareTo(b.values[i]);
if (cmp != 0) {
return cmp < 0;
}
@ -146,14 +147,13 @@ final class MultiSorter {
/** Returns an object for this docID whose .compareTo represents the requested {@link SortField} sort order. */
private interface ComparableProvider {
public Comparable getComparable(int docID) throws IOException;
Comparable getComparable(int docID) throws IOException;
}
/** Returns {@code ComparableProvider}s for the provided readers to represent the requested {@link SortField} sort order. */
private static ComparableProvider[] getComparableProviders(List<CodecReader> readers, SortField sortField) throws IOException {
ComparableProvider[] providers = new ComparableProvider[readers.size()];
final int reverseMul = sortField.getReverse() ? -1 : 1;
final SortField.Type sortType = Sorter.getSortFieldType(sortField);
switch(sortType) {
@ -169,9 +169,9 @@ final class MultiSorter {
OrdinalMap ordinalMap = OrdinalMap.build(null, values, PackedInts.DEFAULT);
final int missingOrd;
if (sortField.getMissingValue() == SortField.STRING_LAST) {
missingOrd = sortField.getReverse() ? Integer.MIN_VALUE : Integer.MAX_VALUE;
missingOrd = Integer.MAX_VALUE;
} else {
missingOrd = sortField.getReverse() ? Integer.MAX_VALUE : Integer.MIN_VALUE;
missingOrd = Integer.MIN_VALUE;
}
for(int readerIndex=0;readerIndex<readers.size();readerIndex++) {
@ -197,7 +197,7 @@ final class MultiSorter {
}
if (readerDocID == docID) {
// translate segment's ord to global ord space:
return reverseMul * (int) globalOrds.get(readerValues.ordValue());
return Math.toIntExact(globalOrds.get(readerValues.ordValue()));
} else {
return missingOrd;
}
@ -238,9 +238,9 @@ final class MultiSorter {
readerDocID = values.advance(docID);
}
if (readerDocID == docID) {
return reverseMul * values.longValue();
return values.longValue();
} else {
return reverseMul * missingValue;
return missingValue;
}
}
};
@ -279,9 +279,9 @@ final class MultiSorter {
readerDocID = values.advance(docID);
}
if (readerDocID == docID) {
return reverseMul * (int) values.longValue();
return (int) values.longValue();
} else {
return reverseMul * missingValue;
return missingValue;
}
}
};
@ -320,9 +320,9 @@ final class MultiSorter {
readerDocID = values.advance(docID);
}
if (readerDocID == docID) {
return reverseMul * Double.longBitsToDouble(values.longValue());
return Double.longBitsToDouble(values.longValue());
} else {
return reverseMul * missingValue;
return missingValue;
}
}
};
@ -361,9 +361,9 @@ final class MultiSorter {
readerDocID = values.advance(docID);
}
if (readerDocID == docID) {
return reverseMul * Float.intBitsToFloat((int) values.longValue());
return Float.intBitsToFloat((int) values.longValue());
} else {
return reverseMul * missingValue;
return missingValue;
}
}
};