mirror of https://github.com/apache/lucene.git
LUCENE-9212: Intervals.multiterm() should take CompiledAutomaton
This commit is contained in:
parent
663611c99c
commit
ffb7cafe93
|
@ -114,6 +114,9 @@ API Changes
|
|||
|
||||
* LUCENE-9218: XY geometries API works in float space. (Ignacio Vera)
|
||||
|
||||
* LUCENE-9212: Intervals.multiterm() takes CompiledAutomaton rather than plain Automaton
|
||||
(Alan Woodward)
|
||||
|
||||
New Features
|
||||
---------------------
|
||||
|
||||
|
|
|
@ -203,7 +203,10 @@ public final class Intervals {
|
|||
* @param pattern string representation of the given automaton, mostly used in exception messages
|
||||
*
|
||||
* @throws IllegalStateException if the automaton accepts more than 128 terms
|
||||
*
|
||||
* @deprecated use {@link #multiterm(CompiledAutomaton, String)}
|
||||
*/
|
||||
@Deprecated
|
||||
public static IntervalsSource multiterm(Automaton automaton, String pattern) {
|
||||
return multiterm(automaton, 128, pattern);
|
||||
}
|
||||
|
@ -219,11 +222,42 @@ public final class Intervals {
|
|||
* @param pattern string representation of the given automaton, mostly used in exception messages
|
||||
*
|
||||
* @throws IllegalStateException if the automaton accepts more than {@code maxExpansions} terms
|
||||
*
|
||||
* @deprecated use {@link #multiterm(CompiledAutomaton, int, String)}
|
||||
*/
|
||||
@Deprecated
|
||||
public static IntervalsSource multiterm(Automaton automaton, int maxExpansions, String pattern) {
|
||||
CompiledAutomaton ca = new CompiledAutomaton(automaton);
|
||||
return new MultiTermIntervalsSource(ca, maxExpansions, pattern);
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert: Return an {@link IntervalsSource} over the disjunction of all terms that's accepted by the given automaton
|
||||
*
|
||||
* @param ca an automaton accepting matching terms
|
||||
* @param pattern string representation of the given automaton, mostly used in exception messages
|
||||
*
|
||||
* @throws IllegalStateException if the automaton accepts more than 128 terms
|
||||
*/
|
||||
public static IntervalsSource multiterm(CompiledAutomaton ca, String pattern) {
|
||||
return multiterm(ca, 128, pattern);
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert: Return an {@link IntervalsSource} over the disjunction of all terms that's accepted by the given automaton
|
||||
*
|
||||
* WARNING: Setting {@code maxExpansions} to higher than the default value of 128
|
||||
* can be both slow and memory-intensive
|
||||
*
|
||||
* @param ca an automaton accepting matching terms
|
||||
* @param maxExpansions the maximum number of terms to expand to
|
||||
* @param pattern string representation of the given automaton, mostly used in exception messages
|
||||
*
|
||||
* @throws IllegalStateException if the automaton accepts more than {@code maxExpansions} terms
|
||||
*/
|
||||
public static IntervalsSource multiterm(CompiledAutomaton ca, int maxExpansions, String pattern) {
|
||||
return new MultiTermIntervalsSource(ca, maxExpansions, pattern);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an {@link IntervalsSource} that filters a sub-source by the width of its intervals
|
||||
|
|
|
@ -20,7 +20,6 @@ package org.apache.lucene.queries.intervals;
|
|||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
|
@ -44,7 +43,6 @@ import org.apache.lucene.index.Term;
|
|||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.MatchesIterator;
|
||||
import org.apache.lucene.search.PrefixQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.QueryVisitor;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
|
@ -52,6 +50,8 @@ import org.apache.lucene.store.Directory;
|
|||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.automaton.CompiledAutomaton;
|
||||
import org.apache.lucene.util.automaton.RegExp;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
|
@ -886,36 +886,30 @@ public class TestIntervals extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testPrefix() throws IOException {
|
||||
for (IntervalsSource source : List.of(Intervals.prefix(new BytesRef("p")),
|
||||
Intervals.multiterm(PrefixQuery.toAutomaton(new BytesRef("p")), "p*" ) )) {
|
||||
checkIntervals(source, "field1", 5, new int[][]{
|
||||
{},
|
||||
{ 0, 0, 1, 1, 3, 3, 4, 4, 6, 6, 7, 7, 10, 10, 27, 27 },
|
||||
{ 0, 0, 1, 1, 3, 3, 4, 4, 6, 6, 7, 7, 10, 10 },
|
||||
{ 7, 7 },
|
||||
{ 0, 0, 1, 1, 3, 3, 4, 4, 6, 6, 7, 7, 10, 10 },
|
||||
{ 0, 0 }
|
||||
});
|
||||
MatchesIterator mi = getMatches(source, 1, "field1");
|
||||
assertNotNull(mi);
|
||||
assertMatch(mi, 0, 0, 0, 5);
|
||||
assertMatch(mi, 1, 1, 6, 14);
|
||||
}
|
||||
IntervalsSource source = Intervals.prefix(new BytesRef("p"));
|
||||
checkIntervals(source, "field1", 5, new int[][]{
|
||||
{},
|
||||
{0, 0, 1, 1, 3, 3, 4, 4, 6, 6, 7, 7, 10, 10, 27, 27},
|
||||
{0, 0, 1, 1, 3, 3, 4, 4, 6, 6, 7, 7, 10, 10},
|
||||
{7, 7},
|
||||
{0, 0, 1, 1, 3, 3, 4, 4, 6, 6, 7, 7, 10, 10},
|
||||
{0, 0}
|
||||
});
|
||||
MatchesIterator mi = getMatches(source, 1, "field1");
|
||||
assertNotNull(mi);
|
||||
assertMatch(mi, 0, 0, 0, 5);
|
||||
assertMatch(mi, 1, 1, 6, 14);
|
||||
|
||||
for (IntervalsSource noSuch : List.of(Intervals.prefix(new BytesRef("qqq")),
|
||||
Intervals.multiterm(PrefixQuery.toAutomaton(new BytesRef("qqq")), "qqq*" ))) {
|
||||
checkIntervals(noSuch, "field1", 0, new int[][]{});
|
||||
}
|
||||
IntervalsSource noSuch = Intervals.prefix(new BytesRef("qqq"));
|
||||
checkIntervals(noSuch, "field1", 0, new int[][]{});
|
||||
|
||||
for (IntervalsSource source : List.of(Intervals.prefix(new BytesRef("p"), 1),
|
||||
Intervals.multiterm(PrefixQuery.toAutomaton(new BytesRef("p")), 1, "p*")) ) {
|
||||
IntervalsSource s = Intervals.prefix(new BytesRef("p"), 1);
|
||||
IllegalStateException e = expectThrows(IllegalStateException.class, () -> {
|
||||
for (LeafReaderContext ctx : searcher.getIndexReader().leaves()) {
|
||||
source.intervals("field1", ctx);
|
||||
s.intervals("field1", ctx);
|
||||
}
|
||||
});
|
||||
assertEquals("Automaton [p*] expanded to too many terms (limit 1)", e.getMessage());
|
||||
}
|
||||
|
||||
checkVisits(Intervals.prefix(new BytesRef("p")), 1);
|
||||
}
|
||||
|
@ -965,4 +959,28 @@ public class TestIntervals extends LuceneTestCase {
|
|||
|
||||
}
|
||||
|
||||
public void testMultiTerm() throws IOException {
|
||||
RegExp re = new RegExp("p.*e");
|
||||
IntervalsSource source = Intervals.multiterm(new CompiledAutomaton(re.toAutomaton()), re.toString());
|
||||
|
||||
checkIntervals(source, "field1", 5, new int[][]{
|
||||
{},
|
||||
{ 0, 0, 1, 1, 3, 3, 4, 4, 6, 6, 7, 7 },
|
||||
{ 0, 0, 1, 1, 3, 3, 4, 4, 6, 6, 7, 7 },
|
||||
{ 7, 7 },
|
||||
{ 0, 0, 1, 1, 3, 3, 4, 4, 6, 6, 7, 7 },
|
||||
{ 0, 0 }
|
||||
});
|
||||
|
||||
IllegalStateException e = expectThrows(IllegalStateException.class, () -> {
|
||||
IntervalsSource s = Intervals.multiterm(new CompiledAutomaton(re.toAutomaton()), 1, re.toString());
|
||||
for (LeafReaderContext ctx : searcher.getIndexReader().leaves()) {
|
||||
s.intervals("field1", ctx);
|
||||
}
|
||||
});
|
||||
assertEquals("Automaton [\\p(.)*\\e] expanded to too many terms (limit 1)", e.getMessage());
|
||||
|
||||
checkVisits(source, 1);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue