LUCENE-9521: Fix TestPassageSelector.randomizedSanityCheck failure (ensure input ranges for the formatter are within input value size).

This commit is contained in:
Dawid Weiss 2020-09-14 12:02:13 +02:00
parent 8f895d9075
commit 978446e4fd
2 changed files with 30 additions and 9 deletions

View File

@ -21,6 +21,7 @@ import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Locale;
import java.util.RandomAccess;
import java.util.function.Function;
@ -55,6 +56,7 @@ public class PassageFormatter {
public List<String> format(CharSequence value, List<Passage> passages, List<OffsetRange> ranges) {
assert PassageSelector.sortedAndNonOverlapping(passages);
assert PassageSelector.sortedAndNonOverlapping(ranges);
assert withinRange(new OffsetRange(0, value.length()), passages);
assert ranges instanceof RandomAccess;
if (ranges.isEmpty()) {
@ -91,6 +93,17 @@ public class PassageFormatter {
return result;
}
private boolean withinRange(OffsetRange limits, List<? extends OffsetRange> contained) {
contained.forEach(r -> {
if (r.from < limits.from || r.to > limits.to) {
throw new AssertionError(String.format(Locale.ROOT,
"Range outside of the permitted limit (limit = %s): %s",
limits, r));
}
});
return true;
}
public StringBuilder format(StringBuilder buf, CharSequence value, final Passage passage) {
switch (passage.markers.size()) {
case 0:
@ -118,7 +131,9 @@ public class PassageFormatter {
return buf;
}
/** Handle multiple markers, possibly overlapping or nested. */
/**
* Handle multiple markers, possibly overlapping or nested.
*/
private void multipleMarkers(
CharSequence value, final Passage p, StringBuilder b, ArrayList<OffsetRange> markerStack) {
int at = p.from;

View File

@ -27,7 +27,6 @@ import java.util.Objects;
import static com.carrotsearch.randomizedtesting.RandomizedTest.*;
@LuceneTestCase.AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-9521")
public class TestPassageSelector extends LuceneTestCase {
@Test
public void checkEmptyExtra() {
@ -241,19 +240,23 @@ public class TestPassageSelector extends LuceneTestCase {
PassageSelector selector = new PassageSelector();
PassageFormatter formatter = new PassageFormatter("...", ">", "<");
ArrayList<OffsetRange> highlights = new ArrayList<>();
ArrayList<OffsetRange> ranges = new ArrayList<>();
ArrayList<OffsetRange> permittedRanges = new ArrayList<>();
for (int i = 0; i < 5000; i++) {
String value =
randomBoolean()
? randomAsciiLettersOfLengthBetween(0, 100)
: randomRealisticUnicodeOfCodepointLengthBetween(0, 1000);
int maxLength = value.length();
ranges.clear();
permittedRanges.clear();
highlights.clear();
for (int j = randomIntBetween(0, 10); --j >= 0; ) {
int from = randomIntBetween(0, value.length());
highlights.add(new OffsetRange(from, from + randomIntBetween(1, 10)));
int to = Math.min(from + randomIntBetween(1, 10), maxLength);
if (from < to) {
highlights.add(new OffsetRange(from, to));
}
}
int charWindow = randomIntBetween(1, 100);
@ -263,17 +266,20 @@ public class TestPassageSelector extends LuceneTestCase {
int increment = value.length() / 10;
for (int c = randomIntBetween(0, 20), start = 0; --c >= 0; ) {
int step = randomIntBetween(0, increment);
ranges.add(new OffsetRange(start, start + step));
int to = Math.min(start + step, maxLength);
if (start < to) {
permittedRanges.add(new OffsetRange(start, to));
}
start += step + randomIntBetween(0, 3);
}
} else {
ranges.add(new OffsetRange(0, value.length()));
permittedRanges.add(new OffsetRange(0, value.length()));
}
// Just make sure there are no exceptions.
List<Passage> passages =
selector.pickBest(value, highlights, charWindow, maxPassages, ranges);
formatter.format(value, passages, ranges);
selector.pickBest(value, highlights, charWindow, maxPassages, permittedRanges);
formatter.format(value, passages, permittedRanges);
}
}