mirror of https://github.com/apache/lucene.git
LUCENE-8845: Allow configurable maxExpansions for prefix/wildcard intervals
This commit is contained in:
parent
f84afab008
commit
e8950f4a52
|
@ -85,6 +85,9 @@ Improvements
|
|||
|
||||
* LUCENE-8818: Fix smokeTestRelease.py encoding bug (janhoy)
|
||||
|
||||
* LUCENE-8845: Allow Intervals.prefix() and Intervals.wildcard() to specify
|
||||
their maximum allowed expansions (Alan Woodward)
|
||||
|
||||
Test Framework
|
||||
|
||||
* LUCENE-8825: CheckHits now display the shard index in case of mismatch
|
||||
|
|
|
@ -147,8 +147,23 @@ public final class Intervals {
|
|||
* @throws IllegalStateException if the prefix expands to more than 128 terms
|
||||
*/
|
||||
public static IntervalsSource prefix(String prefix) {
|
||||
return prefix(prefix, 128);
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert: Return an {@link IntervalsSource} over the disjunction of all terms that begin with a prefix
|
||||
*
|
||||
* WARNING: Setting {@code maxExpansions} to higher than the default value of 128
|
||||
* can be both slow and memory-intensive
|
||||
*
|
||||
* @param prefix the prefix to expand
|
||||
* @param maxExpansions the maximum number of terms to expand to
|
||||
*
|
||||
* @throws IllegalStateException if the prefix expands to more than {@code maxExpansions} terms
|
||||
*/
|
||||
public static IntervalsSource prefix(String prefix, int maxExpansions) {
|
||||
CompiledAutomaton ca = new CompiledAutomaton(PrefixQuery.toAutomaton(new BytesRef(prefix)));
|
||||
return new MultiTermIntervalsSource(ca, 128, prefix);
|
||||
return new MultiTermIntervalsSource(ca, maxExpansions, prefix + "*");
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -159,8 +174,25 @@ public final class Intervals {
|
|||
* @see WildcardQuery for glob format
|
||||
*/
|
||||
public static IntervalsSource wildcard(String wildcard) {
|
||||
return wildcard(wildcard, 128);
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert: Return an {@link IntervalsSource} over the disjunction of all terms that match a wildcard glob
|
||||
*
|
||||
* WARNING: Setting {@code maxExpansions} to higher than the default value of 128
|
||||
* can be both slow and memory-intensive
|
||||
*
|
||||
* @param wildcard the glob to expand
|
||||
* @param maxExpansions the maximum number of terms to expand to
|
||||
*
|
||||
* @throws IllegalStateException if the wildcard glob expands to more than {@code maxExpansions} terms
|
||||
*
|
||||
* @see WildcardQuery for glob format
|
||||
*/
|
||||
public static IntervalsSource wildcard(String wildcard, int maxExpansions) {
|
||||
CompiledAutomaton ca = new CompiledAutomaton(WildcardQuery.toAutomaton(new Term("", wildcard)));
|
||||
return new MultiTermIntervalsSource(ca, 128, wildcard);
|
||||
return new MultiTermIntervalsSource(ca, maxExpansions, wildcard);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -57,8 +57,8 @@ class MultiTermIntervalsSource extends IntervalsSource {
|
|||
int count = 0;
|
||||
while ((term = te.next()) != null) {
|
||||
subSources.add(TermIntervalsSource.intervals(term, te));
|
||||
if (count++ > maxExpansions) {
|
||||
throw new IllegalStateException("Automaton " + this.pattern + " expanded to too many terms (limit " + maxExpansions + ")");
|
||||
if (++count > maxExpansions) {
|
||||
throw new IllegalStateException("Automaton [" + this.pattern + "] expanded to too many terms (limit " + maxExpansions + ")");
|
||||
}
|
||||
}
|
||||
if (subSources.size() == 0) {
|
||||
|
|
|
@ -753,6 +753,14 @@ public class TestIntervals extends LuceneTestCase {
|
|||
|
||||
IntervalsSource noSuch = Intervals.prefix("qqq");
|
||||
checkIntervals(noSuch, "field1", 0, new int[][]{});
|
||||
|
||||
IllegalStateException e = expectThrows(IllegalStateException.class, () -> {
|
||||
IntervalsSource s = Intervals.prefix("p", 1);
|
||||
for (LeafReaderContext ctx : searcher.getIndexReader().leaves()) {
|
||||
s.intervals("field1", ctx);
|
||||
}
|
||||
});
|
||||
assertEquals("Automaton [p*] expanded to too many terms (limit 1)", e.getMessage());
|
||||
}
|
||||
|
||||
public void testWildcard() throws IOException {
|
||||
|
@ -770,6 +778,14 @@ public class TestIntervals extends LuceneTestCase {
|
|||
assertMatch(mi, 2, 2, 15, 18);
|
||||
assertMatch(mi, 10, 10, 63, 66);
|
||||
assertMatch(mi, 17, 17, 97, 100);
|
||||
|
||||
IllegalStateException e = expectThrows(IllegalStateException.class, () -> {
|
||||
IntervalsSource s = Intervals.wildcard("?ot", 1);
|
||||
for (LeafReaderContext ctx : searcher.getIndexReader().leaves()) {
|
||||
s.intervals("field1", ctx);
|
||||
}
|
||||
});
|
||||
assertEquals("Automaton [?ot] expanded to too many terms (limit 1)", e.getMessage());
|
||||
}
|
||||
|
||||
public void testWrappedFilters() throws IOException {
|
||||
|
|
Loading…
Reference in New Issue