For stability of DisjunctionIntervalsSource.toString(), sort subSources (#193)

Iterators over subSources of DisjunctionIntervalsSource may
return elements in indeterminate order, requiring special handling
to make toString() output stable across equivalent instances
This commit is contained in:
Michael Gibney 2021-06-23 07:53:30 -04:00 committed by GitHub
parent 636d10be64
commit 495bf6730f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 34 additions and 1 deletions

View File

@ -120,7 +120,10 @@ class DisjunctionIntervalsSource extends IntervalsSource {
@Override @Override
public String toString() { public String toString() {
return subSources.stream().map(Object::toString).collect(Collectors.joining(",", "or(", ")")); return subSources.stream()
.map(Object::toString)
.sorted()
.collect(Collectors.joining(",", "or(", ")"));
} }
@Override @Override

View File

@ -19,6 +19,7 @@ package org.apache.lucene.queries.intervals;
import java.io.IOException; import java.io.IOException;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet; import java.util.HashSet;
import java.util.Set; import java.util.Set;
import java.util.stream.Collectors; import java.util.stream.Collectors;
@ -401,6 +402,35 @@ public class TestIntervals extends LuceneTestCase {
source, "field1", 3, new int[][] {{}, {0, 3, 2, 6}, {3, 6}, {}, {0, 3, 2, 6}, {}}); source, "field1", 3, new int[][] {{}, {0, 3, 2, 6}, {3, 6}, {}, {0, 3, 2, 6}, {}});
} }
public void testIntervalDisjunctionToStringStability() {
/*
Sanity check that the subclauses of a disjunction are presented in sorted order via the toString() method.
The exact order is irrelevant, but ensuring stability of output makes the output more useful; e.g., for external
comparison across different JVMs, etc...
*/
final int size =
random().nextInt(22) + 4; // ensure a reasonably large minimum number of clauses
final String[] terms = new String[size];
for (int i = 0; i < size; i++) {
terms[i] = Character.toString((char) ('a' + i));
}
final String expected = Arrays.stream(terms).collect(Collectors.joining(",", "or(", ")"));
/*
NOTE: shuffling below shouldn't matter at the moment (because the disjunction subSources are destined for a
HashMap, so will be reordered anyway); but it might matter if the internal implementation of
DisjunctionIntervalsSource changes.
*/
Collections.shuffle(Arrays.asList(terms), random());
IntervalsSource source =
Intervals.or(
Arrays.stream(terms)
.map((term) -> Intervals.term(term))
.toArray((sz) -> new IntervalsSource[sz]));
assertEquals(expected, source.toString());
}
public void testIntervalDisjunction() throws IOException { public void testIntervalDisjunction() throws IOException {
IntervalsSource source = IntervalsSource source =
Intervals.or(Intervals.term("pease"), Intervals.term("hot"), Intervals.term("notMatching")); Intervals.or(Intervals.term("pease"), Intervals.term("hot"), Intervals.term("notMatching"));