mirror of https://github.com/apache/lucene.git
LUCENE-8592: Fix index sorting corruption due to numeric overflow
The merge sort of sorted segments can produce an invalid sort if the sort field is an Integer/Long that uses reverse order and contains values equal to Integer/Long#MIN_VALUE. These values are always sorted first during a merge (instead of last because of the reverse order) due to this bug. Indices affected by the bug can be detected by running the CheckIndex command on a distribution that contains the fix (7.6+).
This commit is contained in:
parent
da7919f7a4
commit
9f29ed0757
|
@ -314,7 +314,14 @@ Bug fixes
|
|||
|
||||
* LUCENE-8595: Fix interleaved DV update and reset. Interleaved update and reset value
|
||||
to the same doc in the same updates package looses an update if the reset comes before
|
||||
the update as well as loosing the reset if the update comes frist. (Simon Willnauer, Adrien Grant)
|
||||
the update as well as loosing the reset if the update comes frist. (Simon Willnauer, Adrien Grand)
|
||||
|
||||
* LUCENE-8592: Fix index sorting corruption due to numeric overflow. The merge of sorted segments
|
||||
can produce an invalid sort if the sort field is an Integer/Long that uses reverse order and contains
|
||||
values equal to Integer/Long#MIN_VALUE. These values are always sorted first during a merge
|
||||
(instead of last because of the reverse order) due to this bug. Indices affected by the bug can be
|
||||
detected by running the CheckIndex command on a distribution that contains the fix (7.6+).
|
||||
(Jim Ferenczi, Adrien Grand, Mike McCandless, Simon Willnauer)
|
||||
|
||||
New Features
|
||||
|
||||
|
|
|
@ -1717,6 +1717,33 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
|||
dir.close();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests that {@link CheckIndex} can detect invalid sort on sorted indices created
|
||||
* before https://issues.apache.org/jira/browse/LUCENE-8592.
|
||||
*/
|
||||
public void testSortedIndexWithInvalidSort() throws Exception {
|
||||
Path path = createTempDir("sorted");
|
||||
String name = "sorted-invalid.7.5.0.zip";
|
||||
InputStream resource = TestBackwardsCompatibility.class.getResourceAsStream(name);
|
||||
assertNotNull("Sorted index index " + name + " not found", resource);
|
||||
TestUtil.unzip(resource, path);
|
||||
|
||||
Directory dir = FSDirectory.open(path);
|
||||
|
||||
DirectoryReader reader = DirectoryReader.open(dir);
|
||||
assertEquals(1, reader.leaves().size());
|
||||
Sort sort = reader.leaves().get(0).reader().getMetaData().getSort();
|
||||
assertNotNull(sort);
|
||||
assertEquals("<long: \"dateDV\">! missingValue=-9223372036854775808", sort.toString());
|
||||
reader.close();
|
||||
CheckIndex.Status status = TestUtil.checkIndex(dir);
|
||||
assertEquals(1, status.segmentInfos.size());
|
||||
assertNotNull(status.segmentInfos.get(0).indexSortStatus.error);
|
||||
assertEquals(status.segmentInfos.get(0).indexSortStatus.error.getMessage(),
|
||||
"segment has indexSort=<long: \"dateDV\">! missingValue=-9223372036854775808 but docID=4 sorts after docID=5");
|
||||
dir.close();
|
||||
}
|
||||
|
||||
static long getValue(BinaryDocValues bdv) throws IOException {
|
||||
BytesRef term = bdv.binaryValue();
|
||||
|
|
Binary file not shown.
|
@ -42,17 +42,18 @@ final class MultiSorter {
|
|||
|
||||
SortField fields[] = sort.getSort();
|
||||
final ComparableProvider[][] comparables = new ComparableProvider[fields.length][];
|
||||
final int[] reverseMuls = new int[fields.length];
|
||||
for(int i=0;i<fields.length;i++) {
|
||||
comparables[i] = getComparableProviders(readers, fields[i]);
|
||||
reverseMuls[i] = fields[i].getReverse() ? -1 : 1;
|
||||
}
|
||||
|
||||
int leafCount = readers.size();
|
||||
|
||||
PriorityQueue<LeafAndDocID> queue = new PriorityQueue<LeafAndDocID>(leafCount) {
|
||||
@Override
|
||||
public boolean lessThan(LeafAndDocID a, LeafAndDocID b) {
|
||||
for(int i=0;i<comparables.length;i++) {
|
||||
int cmp = a.values[i].compareTo(b.values[i]);
|
||||
int cmp = reverseMuls[i] * a.values[i].compareTo(b.values[i]);
|
||||
if (cmp != 0) {
|
||||
return cmp < 0;
|
||||
}
|
||||
|
@ -146,14 +147,13 @@ final class MultiSorter {
|
|||
|
||||
/** Returns an object for this docID whose .compareTo represents the requested {@link SortField} sort order. */
|
||||
private interface ComparableProvider {
|
||||
public Comparable getComparable(int docID) throws IOException;
|
||||
Comparable getComparable(int docID) throws IOException;
|
||||
}
|
||||
|
||||
/** Returns {@code ComparableProvider}s for the provided readers to represent the requested {@link SortField} sort order. */
|
||||
private static ComparableProvider[] getComparableProviders(List<CodecReader> readers, SortField sortField) throws IOException {
|
||||
|
||||
ComparableProvider[] providers = new ComparableProvider[readers.size()];
|
||||
final int reverseMul = sortField.getReverse() ? -1 : 1;
|
||||
final SortField.Type sortType = Sorter.getSortFieldType(sortField);
|
||||
|
||||
switch(sortType) {
|
||||
|
@ -169,9 +169,9 @@ final class MultiSorter {
|
|||
OrdinalMap ordinalMap = OrdinalMap.build(null, values, PackedInts.DEFAULT);
|
||||
final int missingOrd;
|
||||
if (sortField.getMissingValue() == SortField.STRING_LAST) {
|
||||
missingOrd = sortField.getReverse() ? Integer.MIN_VALUE : Integer.MAX_VALUE;
|
||||
missingOrd = Integer.MAX_VALUE;
|
||||
} else {
|
||||
missingOrd = sortField.getReverse() ? Integer.MAX_VALUE : Integer.MIN_VALUE;
|
||||
missingOrd = Integer.MIN_VALUE;
|
||||
}
|
||||
|
||||
for(int readerIndex=0;readerIndex<readers.size();readerIndex++) {
|
||||
|
@ -197,7 +197,7 @@ final class MultiSorter {
|
|||
}
|
||||
if (readerDocID == docID) {
|
||||
// translate segment's ord to global ord space:
|
||||
return reverseMul * (int) globalOrds.get(readerValues.ordValue());
|
||||
return Math.toIntExact(globalOrds.get(readerValues.ordValue()));
|
||||
} else {
|
||||
return missingOrd;
|
||||
}
|
||||
|
@ -238,9 +238,9 @@ final class MultiSorter {
|
|||
readerDocID = values.advance(docID);
|
||||
}
|
||||
if (readerDocID == docID) {
|
||||
return reverseMul * values.longValue();
|
||||
return values.longValue();
|
||||
} else {
|
||||
return reverseMul * missingValue;
|
||||
return missingValue;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
@ -279,9 +279,9 @@ final class MultiSorter {
|
|||
readerDocID = values.advance(docID);
|
||||
}
|
||||
if (readerDocID == docID) {
|
||||
return reverseMul * (int) values.longValue();
|
||||
return (int) values.longValue();
|
||||
} else {
|
||||
return reverseMul * missingValue;
|
||||
return missingValue;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
@ -320,9 +320,9 @@ final class MultiSorter {
|
|||
readerDocID = values.advance(docID);
|
||||
}
|
||||
if (readerDocID == docID) {
|
||||
return reverseMul * Double.longBitsToDouble(values.longValue());
|
||||
return Double.longBitsToDouble(values.longValue());
|
||||
} else {
|
||||
return reverseMul * missingValue;
|
||||
return missingValue;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
@ -361,9 +361,9 @@ final class MultiSorter {
|
|||
readerDocID = values.advance(docID);
|
||||
}
|
||||
if (readerDocID == docID) {
|
||||
return reverseMul * Float.intBitsToFloat((int) values.longValue());
|
||||
return Float.intBitsToFloat((int) values.longValue());
|
||||
} else {
|
||||
return reverseMul * missingValue;
|
||||
return missingValue;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue