LUCENE-8845: Allow configurable maxExpansions for prefix/wildcard intervals

This commit is contained in:
Alan Woodward 2019-06-10 15:49:15 +01:00
parent f84afab008
commit e8950f4a52
4 changed files with 55 additions and 4 deletions

View File

@ -85,6 +85,9 @@ Improvements
* LUCENE-8818: Fix smokeTestRelease.py encoding bug (janhoy)
* LUCENE-8845: Allow Intervals.prefix() and Intervals.wildcard() to specify
their maximum allowed expansions (Alan Woodward)
Test Framework
* LUCENE-8825: CheckHits now display the shard index in case of mismatch

View File

@ -147,8 +147,23 @@ public final class Intervals {
* @throws IllegalStateException if the prefix expands to more than 128 terms
*/
public static IntervalsSource prefix(String prefix) {
return prefix(prefix, 128);
}
/**
* Expert: Return an {@link IntervalsSource} over the disjunction of all terms that begin with a prefix
*
* WARNING: Setting {@code maxExpansions} to higher than the default value of 128
* can be both slow and memory-intensive
*
* @param prefix the prefix to expand
* @param maxExpansions the maximum number of terms to expand to
*
* @throws IllegalStateException if the prefix expands to more than {@code maxExpansions} terms
*/
public static IntervalsSource prefix(String prefix, int maxExpansions) {
CompiledAutomaton ca = new CompiledAutomaton(PrefixQuery.toAutomaton(new BytesRef(prefix)));
return new MultiTermIntervalsSource(ca, 128, prefix);
return new MultiTermIntervalsSource(ca, maxExpansions, prefix + "*");
}
/**
@ -159,8 +174,25 @@ public final class Intervals {
* @see WildcardQuery for glob format
*/
public static IntervalsSource wildcard(String wildcard) {
return wildcard(wildcard, 128);
}
/**
* Expert: Return an {@link IntervalsSource} over the disjunction of all terms that match a wildcard glob
*
* WARNING: Setting {@code maxExpansions} to higher than the default value of 128
* can be both slow and memory-intensive
*
* @param wildcard the glob to expand
* @param maxExpansions the maximum number of terms to expand to
*
* @throws IllegalStateException if the wildcard glob expands to more than {@code maxExpansions} terms
*
* @see WildcardQuery for glob format
*/
public static IntervalsSource wildcard(String wildcard, int maxExpansions) {
CompiledAutomaton ca = new CompiledAutomaton(WildcardQuery.toAutomaton(new Term("", wildcard)));
return new MultiTermIntervalsSource(ca, 128, wildcard);
return new MultiTermIntervalsSource(ca, maxExpansions, wildcard);
}
/**

View File

@ -57,8 +57,8 @@ class MultiTermIntervalsSource extends IntervalsSource {
int count = 0;
while ((term = te.next()) != null) {
subSources.add(TermIntervalsSource.intervals(term, te));
if (count++ > maxExpansions) {
throw new IllegalStateException("Automaton " + this.pattern + " expanded to too many terms (limit " + maxExpansions + ")");
if (++count > maxExpansions) {
throw new IllegalStateException("Automaton [" + this.pattern + "] expanded to too many terms (limit " + maxExpansions + ")");
}
}
if (subSources.size() == 0) {

View File

@ -753,6 +753,14 @@ public class TestIntervals extends LuceneTestCase {
IntervalsSource noSuch = Intervals.prefix("qqq");
checkIntervals(noSuch, "field1", 0, new int[][]{});
IllegalStateException e = expectThrows(IllegalStateException.class, () -> {
IntervalsSource s = Intervals.prefix("p", 1);
for (LeafReaderContext ctx : searcher.getIndexReader().leaves()) {
s.intervals("field1", ctx);
}
});
assertEquals("Automaton [p*] expanded to too many terms (limit 1)", e.getMessage());
}
public void testWildcard() throws IOException {
@ -770,6 +778,14 @@ public class TestIntervals extends LuceneTestCase {
assertMatch(mi, 2, 2, 15, 18);
assertMatch(mi, 10, 10, 63, 66);
assertMatch(mi, 17, 17, 97, 100);
IllegalStateException e = expectThrows(IllegalStateException.class, () -> {
IntervalsSource s = Intervals.wildcard("?ot", 1);
for (LeafReaderContext ctx : searcher.getIndexReader().leaves()) {
s.intervals("field1", ctx);
}
});
assertEquals("Automaton [?ot] expanded to too many terms (limit 1)", e.getMessage());
}
public void testWrappedFilters() throws IOException {